Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[cascardo/linux.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 enum rt6_nud_state {
69         RT6_NUD_FAIL_HARD = -3,
70         RT6_NUD_FAIL_PROBE = -2,
71         RT6_NUD_FAIL_DO_RR = -1,
72         RT6_NUD_SUCCEED = 1
73 };
74
75 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
76                                     const struct in6_addr *dest);
77 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
79 static unsigned int      ip6_mtu(const struct dst_entry *dst);
80 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81 static void             ip6_dst_destroy(struct dst_entry *);
82 static void             ip6_dst_ifdown(struct dst_entry *,
83                                        struct net_device *dev, int how);
84 static int               ip6_dst_gc(struct dst_ops *ops);
85
86 static int              ip6_pkt_discard(struct sk_buff *skb);
87 static int              ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
88 static int              ip6_pkt_prohibit(struct sk_buff *skb);
89 static int              ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
90 static void             ip6_link_failure(struct sk_buff *skb);
91 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
92                                            struct sk_buff *skb, u32 mtu);
93 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
94                                         struct sk_buff *skb);
95 static void             rt6_dst_from_metrics_check(struct rt6_info *rt);
96 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
97
98 #ifdef CONFIG_IPV6_ROUTE_INFO
99 static struct rt6_info *rt6_add_route_info(struct net *net,
100                                            const struct in6_addr *prefix, int prefixlen,
101                                            const struct in6_addr *gwaddr, int ifindex,
102                                            unsigned int pref);
103 static struct rt6_info *rt6_get_route_info(struct net *net,
104                                            const struct in6_addr *prefix, int prefixlen,
105                                            const struct in6_addr *gwaddr, int ifindex);
106 #endif
107
108 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
109 {
110         struct rt6_info *rt = (struct rt6_info *)dst;
111
112         if (rt->rt6i_flags & RTF_CACHE)
113                 return NULL;
114         else
115                 return dst_cow_metrics_generic(dst, old);
116 }
117
118 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
119                                              struct sk_buff *skb,
120                                              const void *daddr)
121 {
122         struct in6_addr *p = &rt->rt6i_gateway;
123
124         if (!ipv6_addr_any(p))
125                 return (const void *) p;
126         else if (skb)
127                 return &ipv6_hdr(skb)->daddr;
128         return daddr;
129 }
130
131 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
132                                           struct sk_buff *skb,
133                                           const void *daddr)
134 {
135         struct rt6_info *rt = (struct rt6_info *) dst;
136         struct neighbour *n;
137
138         daddr = choose_neigh_daddr(rt, skb, daddr);
139         n = __ipv6_neigh_lookup(dst->dev, daddr);
140         if (n)
141                 return n;
142         return neigh_create(&nd_tbl, daddr, dst->dev);
143 }
144
145 static struct dst_ops ip6_dst_ops_template = {
146         .family                 =       AF_INET6,
147         .gc                     =       ip6_dst_gc,
148         .gc_thresh              =       1024,
149         .check                  =       ip6_dst_check,
150         .default_advmss         =       ip6_default_advmss,
151         .mtu                    =       ip6_mtu,
152         .cow_metrics            =       ipv6_cow_metrics,
153         .destroy                =       ip6_dst_destroy,
154         .ifdown                 =       ip6_dst_ifdown,
155         .negative_advice        =       ip6_negative_advice,
156         .link_failure           =       ip6_link_failure,
157         .update_pmtu            =       ip6_rt_update_pmtu,
158         .redirect               =       rt6_do_redirect,
159         .local_out              =       __ip6_local_out,
160         .neigh_lookup           =       ip6_neigh_lookup,
161 };
162
163 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
164 {
165         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
166
167         return mtu ? : dst->dev->mtu;
168 }
169
170 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
171                                          struct sk_buff *skb, u32 mtu)
172 {
173 }
174
175 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
176                                       struct sk_buff *skb)
177 {
178 }
179
180 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
181                                          unsigned long old)
182 {
183         return NULL;
184 }
185
186 static struct dst_ops ip6_dst_blackhole_ops = {
187         .family                 =       AF_INET6,
188         .destroy                =       ip6_dst_destroy,
189         .check                  =       ip6_dst_check,
190         .mtu                    =       ip6_blackhole_mtu,
191         .default_advmss         =       ip6_default_advmss,
192         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
193         .redirect               =       ip6_rt_blackhole_redirect,
194         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
195         .neigh_lookup           =       ip6_neigh_lookup,
196 };
197
198 static const u32 ip6_template_metrics[RTAX_MAX] = {
199         [RTAX_HOPLIMIT - 1] = 0,
200 };
201
202 static const struct rt6_info ip6_null_entry_template = {
203         .dst = {
204                 .__refcnt       = ATOMIC_INIT(1),
205                 .__use          = 1,
206                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
207                 .error          = -ENETUNREACH,
208                 .input          = ip6_pkt_discard,
209                 .output         = ip6_pkt_discard_out,
210         },
211         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
212         .rt6i_protocol  = RTPROT_KERNEL,
213         .rt6i_metric    = ~(u32) 0,
214         .rt6i_ref       = ATOMIC_INIT(1),
215 };
216
217 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
218
219 static const struct rt6_info ip6_prohibit_entry_template = {
220         .dst = {
221                 .__refcnt       = ATOMIC_INIT(1),
222                 .__use          = 1,
223                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
224                 .error          = -EACCES,
225                 .input          = ip6_pkt_prohibit,
226                 .output         = ip6_pkt_prohibit_out,
227         },
228         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
229         .rt6i_protocol  = RTPROT_KERNEL,
230         .rt6i_metric    = ~(u32) 0,
231         .rt6i_ref       = ATOMIC_INIT(1),
232 };
233
234 static const struct rt6_info ip6_blk_hole_entry_template = {
235         .dst = {
236                 .__refcnt       = ATOMIC_INIT(1),
237                 .__use          = 1,
238                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
239                 .error          = -EINVAL,
240                 .input          = dst_discard,
241                 .output         = dst_discard_sk,
242         },
243         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
244         .rt6i_protocol  = RTPROT_KERNEL,
245         .rt6i_metric    = ~(u32) 0,
246         .rt6i_ref       = ATOMIC_INIT(1),
247 };
248
249 #endif
250
251 /* allocate dst with ip6_dst_ops */
252 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
253                                              struct net_device *dev,
254                                              int flags,
255                                              struct fib6_table *table)
256 {
257         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
258                                         0, DST_OBSOLETE_FORCE_CHK, flags);
259
260         if (rt) {
261                 struct dst_entry *dst = &rt->dst;
262
263                 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
264                 INIT_LIST_HEAD(&rt->rt6i_siblings);
265         }
266         return rt;
267 }
268
269 static void ip6_dst_destroy(struct dst_entry *dst)
270 {
271         struct rt6_info *rt = (struct rt6_info *)dst;
272         struct inet6_dev *idev = rt->rt6i_idev;
273         struct dst_entry *from = dst->from;
274
275         dst_destroy_metrics_generic(dst);
276
277         if (idev) {
278                 rt->rt6i_idev = NULL;
279                 in6_dev_put(idev);
280         }
281
282         dst->from = NULL;
283         dst_release(from);
284 }
285
286 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
287                            int how)
288 {
289         struct rt6_info *rt = (struct rt6_info *)dst;
290         struct inet6_dev *idev = rt->rt6i_idev;
291         struct net_device *loopback_dev =
292                 dev_net(dev)->loopback_dev;
293
294         if (dev != loopback_dev) {
295                 if (idev && idev->dev == dev) {
296                         struct inet6_dev *loopback_idev =
297                                 in6_dev_get(loopback_dev);
298                         if (loopback_idev) {
299                                 rt->rt6i_idev = loopback_idev;
300                                 in6_dev_put(idev);
301                         }
302                 }
303         }
304 }
305
306 static bool rt6_check_expired(const struct rt6_info *rt)
307 {
308         if (rt->rt6i_flags & RTF_EXPIRES) {
309                 if (time_after(jiffies, rt->dst.expires))
310                         return true;
311         } else if (rt->dst.from) {
312                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
313         }
314         return false;
315 }
316
317 /* Multipath route selection:
318  *   Hash based function using packet header and flowlabel.
319  * Adapted from fib_info_hashfn()
320  */
321 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
322                                const struct flowi6 *fl6)
323 {
324         unsigned int val = fl6->flowi6_proto;
325
326         val ^= ipv6_addr_hash(&fl6->daddr);
327         val ^= ipv6_addr_hash(&fl6->saddr);
328
329         /* Work only if this not encapsulated */
330         switch (fl6->flowi6_proto) {
331         case IPPROTO_UDP:
332         case IPPROTO_TCP:
333         case IPPROTO_SCTP:
334                 val ^= (__force u16)fl6->fl6_sport;
335                 val ^= (__force u16)fl6->fl6_dport;
336                 break;
337
338         case IPPROTO_ICMPV6:
339                 val ^= (__force u16)fl6->fl6_icmp_type;
340                 val ^= (__force u16)fl6->fl6_icmp_code;
341                 break;
342         }
343         /* RFC6438 recommands to use flowlabel */
344         val ^= (__force u32)fl6->flowlabel;
345
346         /* Perhaps, we need to tune, this function? */
347         val = val ^ (val >> 7) ^ (val >> 12);
348         return val % candidate_count;
349 }
350
351 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
352                                              struct flowi6 *fl6, int oif,
353                                              int strict)
354 {
355         struct rt6_info *sibling, *next_sibling;
356         int route_choosen;
357
358         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
359         /* Don't change the route, if route_choosen == 0
360          * (siblings does not include ourself)
361          */
362         if (route_choosen)
363                 list_for_each_entry_safe(sibling, next_sibling,
364                                 &match->rt6i_siblings, rt6i_siblings) {
365                         route_choosen--;
366                         if (route_choosen == 0) {
367                                 if (rt6_score_route(sibling, oif, strict) < 0)
368                                         break;
369                                 match = sibling;
370                                 break;
371                         }
372                 }
373         return match;
374 }
375
376 /*
377  *      Route lookup. Any table->tb6_lock is implied.
378  */
379
380 static inline struct rt6_info *rt6_device_match(struct net *net,
381                                                     struct rt6_info *rt,
382                                                     const struct in6_addr *saddr,
383                                                     int oif,
384                                                     int flags)
385 {
386         struct rt6_info *local = NULL;
387         struct rt6_info *sprt;
388
389         if (!oif && ipv6_addr_any(saddr))
390                 goto out;
391
392         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
393                 struct net_device *dev = sprt->dst.dev;
394
395                 if (oif) {
396                         if (dev->ifindex == oif)
397                                 return sprt;
398                         if (dev->flags & IFF_LOOPBACK) {
399                                 if (!sprt->rt6i_idev ||
400                                     sprt->rt6i_idev->dev->ifindex != oif) {
401                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
402                                                 continue;
403                                         if (local && (!oif ||
404                                                       local->rt6i_idev->dev->ifindex == oif))
405                                                 continue;
406                                 }
407                                 local = sprt;
408                         }
409                 } else {
410                         if (ipv6_chk_addr(net, saddr, dev,
411                                           flags & RT6_LOOKUP_F_IFACE))
412                                 return sprt;
413                 }
414         }
415
416         if (oif) {
417                 if (local)
418                         return local;
419
420                 if (flags & RT6_LOOKUP_F_IFACE)
421                         return net->ipv6.ip6_null_entry;
422         }
423 out:
424         return rt;
425 }
426
427 #ifdef CONFIG_IPV6_ROUTER_PREF
428 struct __rt6_probe_work {
429         struct work_struct work;
430         struct in6_addr target;
431         struct net_device *dev;
432 };
433
434 static void rt6_probe_deferred(struct work_struct *w)
435 {
436         struct in6_addr mcaddr;
437         struct __rt6_probe_work *work =
438                 container_of(w, struct __rt6_probe_work, work);
439
440         addrconf_addr_solict_mult(&work->target, &mcaddr);
441         ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
442         dev_put(work->dev);
443         kfree(work);
444 }
445
446 static void rt6_probe(struct rt6_info *rt)
447 {
448         struct neighbour *neigh;
449         /*
450          * Okay, this does not seem to be appropriate
451          * for now, however, we need to check if it
452          * is really so; aka Router Reachability Probing.
453          *
454          * Router Reachability Probe MUST be rate-limited
455          * to no more than one per minute.
456          */
457         if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
458                 return;
459         rcu_read_lock_bh();
460         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
461         if (neigh) {
462                 write_lock(&neigh->lock);
463                 if (neigh->nud_state & NUD_VALID)
464                         goto out;
465         }
466
467         if (!neigh ||
468             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
469                 struct __rt6_probe_work *work;
470
471                 work = kmalloc(sizeof(*work), GFP_ATOMIC);
472
473                 if (neigh && work)
474                         __neigh_set_probe_once(neigh);
475
476                 if (neigh)
477                         write_unlock(&neigh->lock);
478
479                 if (work) {
480                         INIT_WORK(&work->work, rt6_probe_deferred);
481                         work->target = rt->rt6i_gateway;
482                         dev_hold(rt->dst.dev);
483                         work->dev = rt->dst.dev;
484                         schedule_work(&work->work);
485                 }
486         } else {
487 out:
488                 write_unlock(&neigh->lock);
489         }
490         rcu_read_unlock_bh();
491 }
492 #else
493 static inline void rt6_probe(struct rt6_info *rt)
494 {
495 }
496 #endif
497
498 /*
499  * Default Router Selection (RFC 2461 6.3.6)
500  */
501 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
502 {
503         struct net_device *dev = rt->dst.dev;
504         if (!oif || dev->ifindex == oif)
505                 return 2;
506         if ((dev->flags & IFF_LOOPBACK) &&
507             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
508                 return 1;
509         return 0;
510 }
511
512 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
513 {
514         struct neighbour *neigh;
515         enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
516
517         if (rt->rt6i_flags & RTF_NONEXTHOP ||
518             !(rt->rt6i_flags & RTF_GATEWAY))
519                 return RT6_NUD_SUCCEED;
520
521         rcu_read_lock_bh();
522         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
523         if (neigh) {
524                 read_lock(&neigh->lock);
525                 if (neigh->nud_state & NUD_VALID)
526                         ret = RT6_NUD_SUCCEED;
527 #ifdef CONFIG_IPV6_ROUTER_PREF
528                 else if (!(neigh->nud_state & NUD_FAILED))
529                         ret = RT6_NUD_SUCCEED;
530                 else
531                         ret = RT6_NUD_FAIL_PROBE;
532 #endif
533                 read_unlock(&neigh->lock);
534         } else {
535                 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
536                       RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
537         }
538         rcu_read_unlock_bh();
539
540         return ret;
541 }
542
543 static int rt6_score_route(struct rt6_info *rt, int oif,
544                            int strict)
545 {
546         int m;
547
548         m = rt6_check_dev(rt, oif);
549         if (!m && (strict & RT6_LOOKUP_F_IFACE))
550                 return RT6_NUD_FAIL_HARD;
551 #ifdef CONFIG_IPV6_ROUTER_PREF
552         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
553 #endif
554         if (strict & RT6_LOOKUP_F_REACHABLE) {
555                 int n = rt6_check_neigh(rt);
556                 if (n < 0)
557                         return n;
558         }
559         return m;
560 }
561
562 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
563                                    int *mpri, struct rt6_info *match,
564                                    bool *do_rr)
565 {
566         int m;
567         bool match_do_rr = false;
568
569         if (rt6_check_expired(rt))
570                 goto out;
571
572         m = rt6_score_route(rt, oif, strict);
573         if (m == RT6_NUD_FAIL_DO_RR) {
574                 match_do_rr = true;
575                 m = 0; /* lowest valid score */
576         } else if (m == RT6_NUD_FAIL_HARD) {
577                 goto out;
578         }
579
580         if (strict & RT6_LOOKUP_F_REACHABLE)
581                 rt6_probe(rt);
582
583         /* note that m can be RT6_NUD_FAIL_PROBE at this point */
584         if (m > *mpri) {
585                 *do_rr = match_do_rr;
586                 *mpri = m;
587                 match = rt;
588         }
589 out:
590         return match;
591 }
592
593 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
594                                      struct rt6_info *rr_head,
595                                      u32 metric, int oif, int strict,
596                                      bool *do_rr)
597 {
598         struct rt6_info *rt, *match, *cont;
599         int mpri = -1;
600
601         match = NULL;
602         cont = NULL;
603         for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
604                 if (rt->rt6i_metric != metric) {
605                         cont = rt;
606                         break;
607                 }
608
609                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
610         }
611
612         for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
613                 if (rt->rt6i_metric != metric) {
614                         cont = rt;
615                         break;
616                 }
617
618                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
619         }
620
621         if (match || !cont)
622                 return match;
623
624         for (rt = cont; rt; rt = rt->dst.rt6_next)
625                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
626
627         return match;
628 }
629
630 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
631 {
632         struct rt6_info *match, *rt0;
633         struct net *net;
634         bool do_rr = false;
635
636         rt0 = fn->rr_ptr;
637         if (!rt0)
638                 fn->rr_ptr = rt0 = fn->leaf;
639
640         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
641                              &do_rr);
642
643         if (do_rr) {
644                 struct rt6_info *next = rt0->dst.rt6_next;
645
646                 /* no entries matched; do round-robin */
647                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
648                         next = fn->leaf;
649
650                 if (next != rt0)
651                         fn->rr_ptr = next;
652         }
653
654         net = dev_net(rt0->dst.dev);
655         return match ? match : net->ipv6.ip6_null_entry;
656 }
657
658 #ifdef CONFIG_IPV6_ROUTE_INFO
659 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
660                   const struct in6_addr *gwaddr)
661 {
662         struct net *net = dev_net(dev);
663         struct route_info *rinfo = (struct route_info *) opt;
664         struct in6_addr prefix_buf, *prefix;
665         unsigned int pref;
666         unsigned long lifetime;
667         struct rt6_info *rt;
668
669         if (len < sizeof(struct route_info)) {
670                 return -EINVAL;
671         }
672
673         /* Sanity check for prefix_len and length */
674         if (rinfo->length > 3) {
675                 return -EINVAL;
676         } else if (rinfo->prefix_len > 128) {
677                 return -EINVAL;
678         } else if (rinfo->prefix_len > 64) {
679                 if (rinfo->length < 2) {
680                         return -EINVAL;
681                 }
682         } else if (rinfo->prefix_len > 0) {
683                 if (rinfo->length < 1) {
684                         return -EINVAL;
685                 }
686         }
687
688         pref = rinfo->route_pref;
689         if (pref == ICMPV6_ROUTER_PREF_INVALID)
690                 return -EINVAL;
691
692         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
693
694         if (rinfo->length == 3)
695                 prefix = (struct in6_addr *)rinfo->prefix;
696         else {
697                 /* this function is safe */
698                 ipv6_addr_prefix(&prefix_buf,
699                                  (struct in6_addr *)rinfo->prefix,
700                                  rinfo->prefix_len);
701                 prefix = &prefix_buf;
702         }
703
704         if (rinfo->prefix_len == 0)
705                 rt = rt6_get_dflt_router(gwaddr, dev);
706         else
707                 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
708                                         gwaddr, dev->ifindex);
709
710         if (rt && !lifetime) {
711                 ip6_del_rt(rt);
712                 rt = NULL;
713         }
714
715         if (!rt && lifetime)
716                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
717                                         pref);
718         else if (rt)
719                 rt->rt6i_flags = RTF_ROUTEINFO |
720                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
721
722         if (rt) {
723                 if (!addrconf_finite_timeout(lifetime))
724                         rt6_clean_expires(rt);
725                 else
726                         rt6_set_expires(rt, jiffies + HZ * lifetime);
727
728                 ip6_rt_put(rt);
729         }
730         return 0;
731 }
732 #endif
733
734 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
735                                         struct in6_addr *saddr)
736 {
737         struct fib6_node *pn;
738         while (1) {
739                 if (fn->fn_flags & RTN_TL_ROOT)
740                         return NULL;
741                 pn = fn->parent;
742                 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
743                         fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
744                 else
745                         fn = pn;
746                 if (fn->fn_flags & RTN_RTINFO)
747                         return fn;
748         }
749 }
750
751 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
752                                              struct fib6_table *table,
753                                              struct flowi6 *fl6, int flags)
754 {
755         struct fib6_node *fn;
756         struct rt6_info *rt;
757
758         read_lock_bh(&table->tb6_lock);
759         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
760 restart:
761         rt = fn->leaf;
762         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
763         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
764                 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
765         if (rt == net->ipv6.ip6_null_entry) {
766                 fn = fib6_backtrack(fn, &fl6->saddr);
767                 if (fn)
768                         goto restart;
769         }
770         dst_use(&rt->dst, jiffies);
771         read_unlock_bh(&table->tb6_lock);
772         return rt;
773
774 }
775
776 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
777                                     int flags)
778 {
779         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
780 }
781 EXPORT_SYMBOL_GPL(ip6_route_lookup);
782
783 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
784                             const struct in6_addr *saddr, int oif, int strict)
785 {
786         struct flowi6 fl6 = {
787                 .flowi6_oif = oif,
788                 .daddr = *daddr,
789         };
790         struct dst_entry *dst;
791         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
792
793         if (saddr) {
794                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
795                 flags |= RT6_LOOKUP_F_HAS_SADDR;
796         }
797
798         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
799         if (dst->error == 0)
800                 return (struct rt6_info *) dst;
801
802         dst_release(dst);
803
804         return NULL;
805 }
806 EXPORT_SYMBOL(rt6_lookup);
807
808 /* ip6_ins_rt is called with FREE table->tb6_lock.
809    It takes new route entry, the addition fails by any reason the
810    route is freed. In any case, if caller does not hold it, it may
811    be destroyed.
812  */
813
814 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
815                         struct mx6_config *mxc)
816 {
817         int err;
818         struct fib6_table *table;
819
820         table = rt->rt6i_table;
821         write_lock_bh(&table->tb6_lock);
822         err = fib6_add(&table->tb6_root, rt, info, mxc);
823         write_unlock_bh(&table->tb6_lock);
824
825         return err;
826 }
827
828 int ip6_ins_rt(struct rt6_info *rt)
829 {
830         struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
831         struct mx6_config mxc = { .mx = NULL, };
832
833         return __ip6_ins_rt(rt, &info, &mxc);
834 }
835
836 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
837                                       const struct in6_addr *daddr,
838                                       const struct in6_addr *saddr)
839 {
840         struct rt6_info *rt;
841
842         /*
843          *      Clone the route.
844          */
845
846         rt = ip6_rt_copy(ort, daddr);
847
848         if (rt) {
849                 if (ort->rt6i_dst.plen != 128 &&
850                     ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
851                         rt->rt6i_flags |= RTF_ANYCAST;
852
853                 rt->rt6i_flags |= RTF_CACHE;
854
855 #ifdef CONFIG_IPV6_SUBTREES
856                 if (rt->rt6i_src.plen && saddr) {
857                         rt->rt6i_src.addr = *saddr;
858                         rt->rt6i_src.plen = 128;
859                 }
860 #endif
861         }
862
863         return rt;
864 }
865
866 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
867                                         const struct in6_addr *daddr)
868 {
869         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
870
871         if (rt)
872                 rt->rt6i_flags |= RTF_CACHE;
873         return rt;
874 }
875
876 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
877                                       struct flowi6 *fl6, int flags)
878 {
879         struct fib6_node *fn, *saved_fn;
880         struct rt6_info *rt, *nrt;
881         int strict = 0;
882         int attempts = 3;
883         int err;
884
885         strict |= flags & RT6_LOOKUP_F_IFACE;
886         if (net->ipv6.devconf_all->forwarding == 0)
887                 strict |= RT6_LOOKUP_F_REACHABLE;
888
889 redo_fib6_lookup_lock:
890         read_lock_bh(&table->tb6_lock);
891
892         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
893         saved_fn = fn;
894
895 redo_rt6_select:
896         rt = rt6_select(fn, oif, strict);
897         if (rt->rt6i_nsiblings)
898                 rt = rt6_multipath_select(rt, fl6, oif, strict);
899         if (rt == net->ipv6.ip6_null_entry) {
900                 fn = fib6_backtrack(fn, &fl6->saddr);
901                 if (fn)
902                         goto redo_rt6_select;
903                 else if (strict & RT6_LOOKUP_F_REACHABLE) {
904                         /* also consider unreachable route */
905                         strict &= ~RT6_LOOKUP_F_REACHABLE;
906                         fn = saved_fn;
907                         goto redo_rt6_select;
908                 } else {
909                         dst_hold(&rt->dst);
910                         read_unlock_bh(&table->tb6_lock);
911                         goto out2;
912                 }
913         }
914
915         dst_hold(&rt->dst);
916         read_unlock_bh(&table->tb6_lock);
917
918         if (rt->rt6i_flags & RTF_CACHE)
919                 goto out2;
920
921         if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
922                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
923         else if (!(rt->dst.flags & DST_HOST) || !(rt->rt6i_flags & RTF_LOCAL))
924                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
925         else
926                 goto out2;
927
928         ip6_rt_put(rt);
929         rt = nrt ? : net->ipv6.ip6_null_entry;
930
931         dst_hold(&rt->dst);
932         if (nrt) {
933                 err = ip6_ins_rt(nrt);
934                 if (!err)
935                         goto out2;
936         }
937
938         if (--attempts <= 0)
939                 goto out2;
940
941         /*
942          * Race condition! In the gap, when table->tb6_lock was
943          * released someone could insert this route.  Relookup.
944          */
945         ip6_rt_put(rt);
946         goto redo_fib6_lookup_lock;
947
948 out2:
949         rt6_dst_from_metrics_check(rt);
950         rt->dst.lastuse = jiffies;
951         rt->dst.__use++;
952
953         return rt;
954 }
955
956 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
957                                             struct flowi6 *fl6, int flags)
958 {
959         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
960 }
961
962 static struct dst_entry *ip6_route_input_lookup(struct net *net,
963                                                 struct net_device *dev,
964                                                 struct flowi6 *fl6, int flags)
965 {
966         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
967                 flags |= RT6_LOOKUP_F_IFACE;
968
969         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
970 }
971
972 void ip6_route_input(struct sk_buff *skb)
973 {
974         const struct ipv6hdr *iph = ipv6_hdr(skb);
975         struct net *net = dev_net(skb->dev);
976         int flags = RT6_LOOKUP_F_HAS_SADDR;
977         struct flowi6 fl6 = {
978                 .flowi6_iif = skb->dev->ifindex,
979                 .daddr = iph->daddr,
980                 .saddr = iph->saddr,
981                 .flowlabel = ip6_flowinfo(iph),
982                 .flowi6_mark = skb->mark,
983                 .flowi6_proto = iph->nexthdr,
984         };
985
986         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
987 }
988
989 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
990                                              struct flowi6 *fl6, int flags)
991 {
992         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
993 }
994
995 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
996                                     struct flowi6 *fl6)
997 {
998         int flags = 0;
999
1000         fl6->flowi6_iif = LOOPBACK_IFINDEX;
1001
1002         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1003                 flags |= RT6_LOOKUP_F_IFACE;
1004
1005         if (!ipv6_addr_any(&fl6->saddr))
1006                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1007         else if (sk)
1008                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1009
1010         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1011 }
1012 EXPORT_SYMBOL(ip6_route_output);
1013
1014 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1015 {
1016         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1017         struct dst_entry *new = NULL;
1018
1019         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1020         if (rt) {
1021                 new = &rt->dst;
1022
1023                 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1024
1025                 new->__use = 1;
1026                 new->input = dst_discard;
1027                 new->output = dst_discard_sk;
1028
1029                 if (dst_metrics_read_only(&ort->dst))
1030                         new->_metrics = ort->dst._metrics;
1031                 else
1032                         dst_copy_metrics(new, &ort->dst);
1033                 rt->rt6i_idev = ort->rt6i_idev;
1034                 if (rt->rt6i_idev)
1035                         in6_dev_hold(rt->rt6i_idev);
1036
1037                 rt->rt6i_gateway = ort->rt6i_gateway;
1038                 rt->rt6i_flags = ort->rt6i_flags;
1039                 rt->rt6i_metric = 0;
1040
1041                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1042 #ifdef CONFIG_IPV6_SUBTREES
1043                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1044 #endif
1045
1046                 dst_free(new);
1047         }
1048
1049         dst_release(dst_orig);
1050         return new ? new : ERR_PTR(-ENOMEM);
1051 }
1052
1053 /*
1054  *      Destination cache support functions
1055  */
1056
1057 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1058 {
1059         if (rt->dst.from &&
1060             dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1061                 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1062 }
1063
1064 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1065 {
1066         struct rt6_info *rt;
1067
1068         rt = (struct rt6_info *) dst;
1069
1070         /* All IPV6 dsts are created with ->obsolete set to the value
1071          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1072          * into this function always.
1073          */
1074         if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1075                 return NULL;
1076
1077         if (rt6_check_expired(rt))
1078                 return NULL;
1079
1080         rt6_dst_from_metrics_check(rt);
1081
1082         return dst;
1083 }
1084
1085 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1086 {
1087         struct rt6_info *rt = (struct rt6_info *) dst;
1088
1089         if (rt) {
1090                 if (rt->rt6i_flags & RTF_CACHE) {
1091                         if (rt6_check_expired(rt)) {
1092                                 ip6_del_rt(rt);
1093                                 dst = NULL;
1094                         }
1095                 } else {
1096                         dst_release(dst);
1097                         dst = NULL;
1098                 }
1099         }
1100         return dst;
1101 }
1102
1103 static void ip6_link_failure(struct sk_buff *skb)
1104 {
1105         struct rt6_info *rt;
1106
1107         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1108
1109         rt = (struct rt6_info *) skb_dst(skb);
1110         if (rt) {
1111                 if (rt->rt6i_flags & RTF_CACHE) {
1112                         dst_hold(&rt->dst);
1113                         if (ip6_del_rt(rt))
1114                                 dst_free(&rt->dst);
1115                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1116                         rt->rt6i_node->fn_sernum = -1;
1117                 }
1118         }
1119 }
1120
1121 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1122                                struct sk_buff *skb, u32 mtu)
1123 {
1124         struct rt6_info *rt6 = (struct rt6_info *)dst;
1125
1126         dst_confirm(dst);
1127         if (mtu < dst_mtu(dst) && (rt6->rt6i_flags & RTF_CACHE)) {
1128                 struct net *net = dev_net(dst->dev);
1129
1130                 rt6->rt6i_flags |= RTF_MODIFIED;
1131                 if (mtu < IPV6_MIN_MTU)
1132                         mtu = IPV6_MIN_MTU;
1133
1134                 rt6->rt6i_pmtu = mtu;
1135                 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1136         }
1137 }
1138
1139 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1140                      int oif, u32 mark)
1141 {
1142         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1143         struct dst_entry *dst;
1144         struct flowi6 fl6;
1145
1146         memset(&fl6, 0, sizeof(fl6));
1147         fl6.flowi6_oif = oif;
1148         fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1149         fl6.daddr = iph->daddr;
1150         fl6.saddr = iph->saddr;
1151         fl6.flowlabel = ip6_flowinfo(iph);
1152
1153         dst = ip6_route_output(net, NULL, &fl6);
1154         if (!dst->error)
1155                 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1156         dst_release(dst);
1157 }
1158 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1159
1160 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1161 {
1162         ip6_update_pmtu(skb, sock_net(sk), mtu,
1163                         sk->sk_bound_dev_if, sk->sk_mark);
1164 }
1165 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1166
1167 /* Handle redirects */
1168 struct ip6rd_flowi {
1169         struct flowi6 fl6;
1170         struct in6_addr gateway;
1171 };
1172
1173 static struct rt6_info *__ip6_route_redirect(struct net *net,
1174                                              struct fib6_table *table,
1175                                              struct flowi6 *fl6,
1176                                              int flags)
1177 {
1178         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1179         struct rt6_info *rt;
1180         struct fib6_node *fn;
1181
1182         /* Get the "current" route for this destination and
1183          * check if the redirect has come from approriate router.
1184          *
1185          * RFC 4861 specifies that redirects should only be
1186          * accepted if they come from the nexthop to the target.
1187          * Due to the way the routes are chosen, this notion
1188          * is a bit fuzzy and one might need to check all possible
1189          * routes.
1190          */
1191
1192         read_lock_bh(&table->tb6_lock);
1193         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1194 restart:
1195         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1196                 if (rt6_check_expired(rt))
1197                         continue;
1198                 if (rt->dst.error)
1199                         break;
1200                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1201                         continue;
1202                 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1203                         continue;
1204                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1205                         continue;
1206                 break;
1207         }
1208
1209         if (!rt)
1210                 rt = net->ipv6.ip6_null_entry;
1211         else if (rt->dst.error) {
1212                 rt = net->ipv6.ip6_null_entry;
1213                 goto out;
1214         }
1215
1216         if (rt == net->ipv6.ip6_null_entry) {
1217                 fn = fib6_backtrack(fn, &fl6->saddr);
1218                 if (fn)
1219                         goto restart;
1220         }
1221
1222 out:
1223         dst_hold(&rt->dst);
1224
1225         read_unlock_bh(&table->tb6_lock);
1226
1227         return rt;
1228 };
1229
1230 static struct dst_entry *ip6_route_redirect(struct net *net,
1231                                         const struct flowi6 *fl6,
1232                                         const struct in6_addr *gateway)
1233 {
1234         int flags = RT6_LOOKUP_F_HAS_SADDR;
1235         struct ip6rd_flowi rdfl;
1236
1237         rdfl.fl6 = *fl6;
1238         rdfl.gateway = *gateway;
1239
1240         return fib6_rule_lookup(net, &rdfl.fl6,
1241                                 flags, __ip6_route_redirect);
1242 }
1243
1244 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1245 {
1246         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1247         struct dst_entry *dst;
1248         struct flowi6 fl6;
1249
1250         memset(&fl6, 0, sizeof(fl6));
1251         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1252         fl6.flowi6_oif = oif;
1253         fl6.flowi6_mark = mark;
1254         fl6.daddr = iph->daddr;
1255         fl6.saddr = iph->saddr;
1256         fl6.flowlabel = ip6_flowinfo(iph);
1257
1258         dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1259         rt6_do_redirect(dst, NULL, skb);
1260         dst_release(dst);
1261 }
1262 EXPORT_SYMBOL_GPL(ip6_redirect);
1263
1264 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1265                             u32 mark)
1266 {
1267         const struct ipv6hdr *iph = ipv6_hdr(skb);
1268         const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1269         struct dst_entry *dst;
1270         struct flowi6 fl6;
1271
1272         memset(&fl6, 0, sizeof(fl6));
1273         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1274         fl6.flowi6_oif = oif;
1275         fl6.flowi6_mark = mark;
1276         fl6.daddr = msg->dest;
1277         fl6.saddr = iph->daddr;
1278
1279         dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1280         rt6_do_redirect(dst, NULL, skb);
1281         dst_release(dst);
1282 }
1283
1284 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1285 {
1286         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1287 }
1288 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1289
1290 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1291 {
1292         struct net_device *dev = dst->dev;
1293         unsigned int mtu = dst_mtu(dst);
1294         struct net *net = dev_net(dev);
1295
1296         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1297
1298         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1299                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1300
1301         /*
1302          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1303          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1304          * IPV6_MAXPLEN is also valid and means: "any MSS,
1305          * rely only on pmtu discovery"
1306          */
1307         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1308                 mtu = IPV6_MAXPLEN;
1309         return mtu;
1310 }
1311
1312 static unsigned int ip6_mtu(const struct dst_entry *dst)
1313 {
1314         const struct rt6_info *rt = (const struct rt6_info *)dst;
1315         unsigned int mtu = rt->rt6i_pmtu;
1316         struct inet6_dev *idev;
1317
1318         if (mtu)
1319                 goto out;
1320
1321         mtu = dst_metric_raw(dst, RTAX_MTU);
1322         if (mtu)
1323                 goto out;
1324
1325         mtu = IPV6_MIN_MTU;
1326
1327         rcu_read_lock();
1328         idev = __in6_dev_get(dst->dev);
1329         if (idev)
1330                 mtu = idev->cnf.mtu6;
1331         rcu_read_unlock();
1332
1333 out:
1334         return min_t(unsigned int, mtu, IP6_MAX_MTU);
1335 }
1336
1337 static struct dst_entry *icmp6_dst_gc_list;
1338 static DEFINE_SPINLOCK(icmp6_dst_lock);
1339
1340 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1341                                   struct flowi6 *fl6)
1342 {
1343         struct dst_entry *dst;
1344         struct rt6_info *rt;
1345         struct inet6_dev *idev = in6_dev_get(dev);
1346         struct net *net = dev_net(dev);
1347
1348         if (unlikely(!idev))
1349                 return ERR_PTR(-ENODEV);
1350
1351         rt = ip6_dst_alloc(net, dev, 0, NULL);
1352         if (unlikely(!rt)) {
1353                 in6_dev_put(idev);
1354                 dst = ERR_PTR(-ENOMEM);
1355                 goto out;
1356         }
1357
1358         rt->dst.flags |= DST_HOST;
1359         rt->dst.output  = ip6_output;
1360         atomic_set(&rt->dst.__refcnt, 1);
1361         rt->rt6i_gateway  = fl6->daddr;
1362         rt->rt6i_dst.addr = fl6->daddr;
1363         rt->rt6i_dst.plen = 128;
1364         rt->rt6i_idev     = idev;
1365         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1366
1367         spin_lock_bh(&icmp6_dst_lock);
1368         rt->dst.next = icmp6_dst_gc_list;
1369         icmp6_dst_gc_list = &rt->dst;
1370         spin_unlock_bh(&icmp6_dst_lock);
1371
1372         fib6_force_start_gc(net);
1373
1374         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1375
1376 out:
1377         return dst;
1378 }
1379
1380 int icmp6_dst_gc(void)
1381 {
1382         struct dst_entry *dst, **pprev;
1383         int more = 0;
1384
1385         spin_lock_bh(&icmp6_dst_lock);
1386         pprev = &icmp6_dst_gc_list;
1387
1388         while ((dst = *pprev) != NULL) {
1389                 if (!atomic_read(&dst->__refcnt)) {
1390                         *pprev = dst->next;
1391                         dst_free(dst);
1392                 } else {
1393                         pprev = &dst->next;
1394                         ++more;
1395                 }
1396         }
1397
1398         spin_unlock_bh(&icmp6_dst_lock);
1399
1400         return more;
1401 }
1402
1403 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1404                             void *arg)
1405 {
1406         struct dst_entry *dst, **pprev;
1407
1408         spin_lock_bh(&icmp6_dst_lock);
1409         pprev = &icmp6_dst_gc_list;
1410         while ((dst = *pprev) != NULL) {
1411                 struct rt6_info *rt = (struct rt6_info *) dst;
1412                 if (func(rt, arg)) {
1413                         *pprev = dst->next;
1414                         dst_free(dst);
1415                 } else {
1416                         pprev = &dst->next;
1417                 }
1418         }
1419         spin_unlock_bh(&icmp6_dst_lock);
1420 }
1421
1422 static int ip6_dst_gc(struct dst_ops *ops)
1423 {
1424         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1425         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1426         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1427         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1428         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1429         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1430         int entries;
1431
1432         entries = dst_entries_get_fast(ops);
1433         if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1434             entries <= rt_max_size)
1435                 goto out;
1436
1437         net->ipv6.ip6_rt_gc_expire++;
1438         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1439         entries = dst_entries_get_slow(ops);
1440         if (entries < ops->gc_thresh)
1441                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1442 out:
1443         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1444         return entries > rt_max_size;
1445 }
1446
1447 static int ip6_convert_metrics(struct mx6_config *mxc,
1448                                const struct fib6_config *cfg)
1449 {
1450         struct nlattr *nla;
1451         int remaining;
1452         u32 *mp;
1453
1454         if (!cfg->fc_mx)
1455                 return 0;
1456
1457         mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1458         if (unlikely(!mp))
1459                 return -ENOMEM;
1460
1461         nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1462                 int type = nla_type(nla);
1463
1464                 if (type) {
1465                         u32 val;
1466
1467                         if (unlikely(type > RTAX_MAX))
1468                                 goto err;
1469                         if (type == RTAX_CC_ALGO) {
1470                                 char tmp[TCP_CA_NAME_MAX];
1471
1472                                 nla_strlcpy(tmp, nla, sizeof(tmp));
1473                                 val = tcp_ca_get_key_by_name(tmp);
1474                                 if (val == TCP_CA_UNSPEC)
1475                                         goto err;
1476                         } else {
1477                                 val = nla_get_u32(nla);
1478                         }
1479
1480                         mp[type - 1] = val;
1481                         __set_bit(type - 1, mxc->mx_valid);
1482                 }
1483         }
1484
1485         mxc->mx = mp;
1486
1487         return 0;
1488  err:
1489         kfree(mp);
1490         return -EINVAL;
1491 }
1492
1493 int ip6_route_add(struct fib6_config *cfg)
1494 {
1495         int err;
1496         struct net *net = cfg->fc_nlinfo.nl_net;
1497         struct rt6_info *rt = NULL;
1498         struct net_device *dev = NULL;
1499         struct inet6_dev *idev = NULL;
1500         struct fib6_table *table;
1501         struct mx6_config mxc = { .mx = NULL, };
1502         int addr_type;
1503
1504         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1505                 return -EINVAL;
1506 #ifndef CONFIG_IPV6_SUBTREES
1507         if (cfg->fc_src_len)
1508                 return -EINVAL;
1509 #endif
1510         if (cfg->fc_ifindex) {
1511                 err = -ENODEV;
1512                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1513                 if (!dev)
1514                         goto out;
1515                 idev = in6_dev_get(dev);
1516                 if (!idev)
1517                         goto out;
1518         }
1519
1520         if (cfg->fc_metric == 0)
1521                 cfg->fc_metric = IP6_RT_PRIO_USER;
1522
1523         err = -ENOBUFS;
1524         if (cfg->fc_nlinfo.nlh &&
1525             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1526                 table = fib6_get_table(net, cfg->fc_table);
1527                 if (!table) {
1528                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1529                         table = fib6_new_table(net, cfg->fc_table);
1530                 }
1531         } else {
1532                 table = fib6_new_table(net, cfg->fc_table);
1533         }
1534
1535         if (!table)
1536                 goto out;
1537
1538         rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
1539
1540         if (!rt) {
1541                 err = -ENOMEM;
1542                 goto out;
1543         }
1544
1545         if (cfg->fc_flags & RTF_EXPIRES)
1546                 rt6_set_expires(rt, jiffies +
1547                                 clock_t_to_jiffies(cfg->fc_expires));
1548         else
1549                 rt6_clean_expires(rt);
1550
1551         if (cfg->fc_protocol == RTPROT_UNSPEC)
1552                 cfg->fc_protocol = RTPROT_BOOT;
1553         rt->rt6i_protocol = cfg->fc_protocol;
1554
1555         addr_type = ipv6_addr_type(&cfg->fc_dst);
1556
1557         if (addr_type & IPV6_ADDR_MULTICAST)
1558                 rt->dst.input = ip6_mc_input;
1559         else if (cfg->fc_flags & RTF_LOCAL)
1560                 rt->dst.input = ip6_input;
1561         else
1562                 rt->dst.input = ip6_forward;
1563
1564         rt->dst.output = ip6_output;
1565
1566         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1567         rt->rt6i_dst.plen = cfg->fc_dst_len;
1568         if (rt->rt6i_dst.plen == 128)
1569                 rt->dst.flags |= DST_HOST;
1570
1571 #ifdef CONFIG_IPV6_SUBTREES
1572         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1573         rt->rt6i_src.plen = cfg->fc_src_len;
1574 #endif
1575
1576         rt->rt6i_metric = cfg->fc_metric;
1577
1578         /* We cannot add true routes via loopback here,
1579            they would result in kernel looping; promote them to reject routes
1580          */
1581         if ((cfg->fc_flags & RTF_REJECT) ||
1582             (dev && (dev->flags & IFF_LOOPBACK) &&
1583              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1584              !(cfg->fc_flags & RTF_LOCAL))) {
1585                 /* hold loopback dev/idev if we haven't done so. */
1586                 if (dev != net->loopback_dev) {
1587                         if (dev) {
1588                                 dev_put(dev);
1589                                 in6_dev_put(idev);
1590                         }
1591                         dev = net->loopback_dev;
1592                         dev_hold(dev);
1593                         idev = in6_dev_get(dev);
1594                         if (!idev) {
1595                                 err = -ENODEV;
1596                                 goto out;
1597                         }
1598                 }
1599                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1600                 switch (cfg->fc_type) {
1601                 case RTN_BLACKHOLE:
1602                         rt->dst.error = -EINVAL;
1603                         rt->dst.output = dst_discard_sk;
1604                         rt->dst.input = dst_discard;
1605                         break;
1606                 case RTN_PROHIBIT:
1607                         rt->dst.error = -EACCES;
1608                         rt->dst.output = ip6_pkt_prohibit_out;
1609                         rt->dst.input = ip6_pkt_prohibit;
1610                         break;
1611                 case RTN_THROW:
1612                 default:
1613                         rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1614                                         : -ENETUNREACH;
1615                         rt->dst.output = ip6_pkt_discard_out;
1616                         rt->dst.input = ip6_pkt_discard;
1617                         break;
1618                 }
1619                 goto install_route;
1620         }
1621
1622         if (cfg->fc_flags & RTF_GATEWAY) {
1623                 const struct in6_addr *gw_addr;
1624                 int gwa_type;
1625
1626                 gw_addr = &cfg->fc_gateway;
1627
1628                 /* if gw_addr is local we will fail to detect this in case
1629                  * address is still TENTATIVE (DAD in progress). rt6_lookup()
1630                  * will return already-added prefix route via interface that
1631                  * prefix route was assigned to, which might be non-loopback.
1632                  */
1633                 err = -EINVAL;
1634                 if (ipv6_chk_addr_and_flags(net, gw_addr, NULL, 0, 0))
1635                         goto out;
1636
1637                 rt->rt6i_gateway = *gw_addr;
1638                 gwa_type = ipv6_addr_type(gw_addr);
1639
1640                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1641                         struct rt6_info *grt;
1642
1643                         /* IPv6 strictly inhibits using not link-local
1644                            addresses as nexthop address.
1645                            Otherwise, router will not able to send redirects.
1646                            It is very good, but in some (rare!) circumstances
1647                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1648                            some exceptions. --ANK
1649                          */
1650                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1651                                 goto out;
1652
1653                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1654
1655                         err = -EHOSTUNREACH;
1656                         if (!grt)
1657                                 goto out;
1658                         if (dev) {
1659                                 if (dev != grt->dst.dev) {
1660                                         ip6_rt_put(grt);
1661                                         goto out;
1662                                 }
1663                         } else {
1664                                 dev = grt->dst.dev;
1665                                 idev = grt->rt6i_idev;
1666                                 dev_hold(dev);
1667                                 in6_dev_hold(grt->rt6i_idev);
1668                         }
1669                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1670                                 err = 0;
1671                         ip6_rt_put(grt);
1672
1673                         if (err)
1674                                 goto out;
1675                 }
1676                 err = -EINVAL;
1677                 if (!dev || (dev->flags & IFF_LOOPBACK))
1678                         goto out;
1679         }
1680
1681         err = -ENODEV;
1682         if (!dev)
1683                 goto out;
1684
1685         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1686                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1687                         err = -EINVAL;
1688                         goto out;
1689                 }
1690                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1691                 rt->rt6i_prefsrc.plen = 128;
1692         } else
1693                 rt->rt6i_prefsrc.plen = 0;
1694
1695         rt->rt6i_flags = cfg->fc_flags;
1696
1697 install_route:
1698         rt->dst.dev = dev;
1699         rt->rt6i_idev = idev;
1700         rt->rt6i_table = table;
1701
1702         cfg->fc_nlinfo.nl_net = dev_net(dev);
1703
1704         err = ip6_convert_metrics(&mxc, cfg);
1705         if (err)
1706                 goto out;
1707
1708         err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1709
1710         kfree(mxc.mx);
1711         return err;
1712 out:
1713         if (dev)
1714                 dev_put(dev);
1715         if (idev)
1716                 in6_dev_put(idev);
1717         if (rt)
1718                 dst_free(&rt->dst);
1719         return err;
1720 }
1721
1722 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1723 {
1724         int err;
1725         struct fib6_table *table;
1726         struct net *net = dev_net(rt->dst.dev);
1727
1728         if (rt == net->ipv6.ip6_null_entry) {
1729                 err = -ENOENT;
1730                 goto out;
1731         }
1732
1733         table = rt->rt6i_table;
1734         write_lock_bh(&table->tb6_lock);
1735         err = fib6_del(rt, info);
1736         write_unlock_bh(&table->tb6_lock);
1737
1738 out:
1739         ip6_rt_put(rt);
1740         return err;
1741 }
1742
1743 int ip6_del_rt(struct rt6_info *rt)
1744 {
1745         struct nl_info info = {
1746                 .nl_net = dev_net(rt->dst.dev),
1747         };
1748         return __ip6_del_rt(rt, &info);
1749 }
1750
1751 static int ip6_route_del(struct fib6_config *cfg)
1752 {
1753         struct fib6_table *table;
1754         struct fib6_node *fn;
1755         struct rt6_info *rt;
1756         int err = -ESRCH;
1757
1758         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1759         if (!table)
1760                 return err;
1761
1762         read_lock_bh(&table->tb6_lock);
1763
1764         fn = fib6_locate(&table->tb6_root,
1765                          &cfg->fc_dst, cfg->fc_dst_len,
1766                          &cfg->fc_src, cfg->fc_src_len);
1767
1768         if (fn) {
1769                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1770                         if ((rt->rt6i_flags & RTF_CACHE) &&
1771                             !(cfg->fc_flags & RTF_CACHE))
1772                                 continue;
1773                         if (cfg->fc_ifindex &&
1774                             (!rt->dst.dev ||
1775                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1776                                 continue;
1777                         if (cfg->fc_flags & RTF_GATEWAY &&
1778                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1779                                 continue;
1780                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1781                                 continue;
1782                         dst_hold(&rt->dst);
1783                         read_unlock_bh(&table->tb6_lock);
1784
1785                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1786                 }
1787         }
1788         read_unlock_bh(&table->tb6_lock);
1789
1790         return err;
1791 }
1792
1793 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1794 {
1795         struct net *net = dev_net(skb->dev);
1796         struct netevent_redirect netevent;
1797         struct rt6_info *rt, *nrt = NULL;
1798         struct ndisc_options ndopts;
1799         struct inet6_dev *in6_dev;
1800         struct neighbour *neigh;
1801         struct rd_msg *msg;
1802         int optlen, on_link;
1803         u8 *lladdr;
1804
1805         optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1806         optlen -= sizeof(*msg);
1807
1808         if (optlen < 0) {
1809                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1810                 return;
1811         }
1812
1813         msg = (struct rd_msg *)icmp6_hdr(skb);
1814
1815         if (ipv6_addr_is_multicast(&msg->dest)) {
1816                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1817                 return;
1818         }
1819
1820         on_link = 0;
1821         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1822                 on_link = 1;
1823         } else if (ipv6_addr_type(&msg->target) !=
1824                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1825                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1826                 return;
1827         }
1828
1829         in6_dev = __in6_dev_get(skb->dev);
1830         if (!in6_dev)
1831                 return;
1832         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1833                 return;
1834
1835         /* RFC2461 8.1:
1836          *      The IP source address of the Redirect MUST be the same as the current
1837          *      first-hop router for the specified ICMP Destination Address.
1838          */
1839
1840         if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1841                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1842                 return;
1843         }
1844
1845         lladdr = NULL;
1846         if (ndopts.nd_opts_tgt_lladdr) {
1847                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1848                                              skb->dev);
1849                 if (!lladdr) {
1850                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1851                         return;
1852                 }
1853         }
1854
1855         rt = (struct rt6_info *) dst;
1856         if (rt == net->ipv6.ip6_null_entry) {
1857                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1858                 return;
1859         }
1860
1861         /* Redirect received -> path was valid.
1862          * Look, redirects are sent only in response to data packets,
1863          * so that this nexthop apparently is reachable. --ANK
1864          */
1865         dst_confirm(&rt->dst);
1866
1867         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1868         if (!neigh)
1869                 return;
1870
1871         /*
1872          *      We have finally decided to accept it.
1873          */
1874
1875         neigh_update(neigh, lladdr, NUD_STALE,
1876                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1877                      NEIGH_UPDATE_F_OVERRIDE|
1878                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1879                                      NEIGH_UPDATE_F_ISROUTER))
1880                      );
1881
1882         nrt = ip6_rt_copy(rt, &msg->dest);
1883         if (!nrt)
1884                 goto out;
1885
1886         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1887         if (on_link)
1888                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1889
1890         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1891
1892         if (ip6_ins_rt(nrt))
1893                 goto out;
1894
1895         netevent.old = &rt->dst;
1896         netevent.new = &nrt->dst;
1897         netevent.daddr = &msg->dest;
1898         netevent.neigh = neigh;
1899         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1900
1901         if (rt->rt6i_flags & RTF_CACHE) {
1902                 rt = (struct rt6_info *) dst_clone(&rt->dst);
1903                 ip6_del_rt(rt);
1904         }
1905
1906 out:
1907         neigh_release(neigh);
1908 }
1909
1910 /*
1911  *      Misc support functions
1912  */
1913
1914 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
1915 {
1916         BUG_ON(from->dst.from);
1917
1918         rt->rt6i_flags &= ~RTF_EXPIRES;
1919         dst_hold(&from->dst);
1920         rt->dst.from = &from->dst;
1921         dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
1922 }
1923
1924 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1925                                     const struct in6_addr *dest)
1926 {
1927         struct net *net = dev_net(ort->dst.dev);
1928         struct rt6_info *rt;
1929
1930         if (ort->rt6i_flags & RTF_CACHE)
1931                 ort = (struct rt6_info *)ort->dst.from;
1932
1933         rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1934                            ort->rt6i_table);
1935
1936         if (rt) {
1937                 rt->dst.input = ort->dst.input;
1938                 rt->dst.output = ort->dst.output;
1939                 rt->dst.flags |= DST_HOST;
1940
1941                 rt->rt6i_dst.addr = *dest;
1942                 rt->rt6i_dst.plen = 128;
1943                 rt->dst.error = ort->dst.error;
1944                 rt->rt6i_idev = ort->rt6i_idev;
1945                 if (rt->rt6i_idev)
1946                         in6_dev_hold(rt->rt6i_idev);
1947                 rt->dst.lastuse = jiffies;
1948
1949                 if (ort->rt6i_flags & RTF_GATEWAY)
1950                         rt->rt6i_gateway = ort->rt6i_gateway;
1951                 else
1952                         rt->rt6i_gateway = *dest;
1953                 rt->rt6i_flags = ort->rt6i_flags;
1954                 rt6_set_from(rt, ort);
1955                 rt->rt6i_metric = 0;
1956
1957 #ifdef CONFIG_IPV6_SUBTREES
1958                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1959 #endif
1960                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1961                 rt->rt6i_table = ort->rt6i_table;
1962         }
1963         return rt;
1964 }
1965
1966 #ifdef CONFIG_IPV6_ROUTE_INFO
1967 static struct rt6_info *rt6_get_route_info(struct net *net,
1968                                            const struct in6_addr *prefix, int prefixlen,
1969                                            const struct in6_addr *gwaddr, int ifindex)
1970 {
1971         struct fib6_node *fn;
1972         struct rt6_info *rt = NULL;
1973         struct fib6_table *table;
1974
1975         table = fib6_get_table(net, RT6_TABLE_INFO);
1976         if (!table)
1977                 return NULL;
1978
1979         read_lock_bh(&table->tb6_lock);
1980         fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
1981         if (!fn)
1982                 goto out;
1983
1984         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1985                 if (rt->dst.dev->ifindex != ifindex)
1986                         continue;
1987                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1988                         continue;
1989                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1990                         continue;
1991                 dst_hold(&rt->dst);
1992                 break;
1993         }
1994 out:
1995         read_unlock_bh(&table->tb6_lock);
1996         return rt;
1997 }
1998
1999 static struct rt6_info *rt6_add_route_info(struct net *net,
2000                                            const struct in6_addr *prefix, int prefixlen,
2001                                            const struct in6_addr *gwaddr, int ifindex,
2002                                            unsigned int pref)
2003 {
2004         struct fib6_config cfg = {
2005                 .fc_table       = RT6_TABLE_INFO,
2006                 .fc_metric      = IP6_RT_PRIO_USER,
2007                 .fc_ifindex     = ifindex,
2008                 .fc_dst_len     = prefixlen,
2009                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2010                                   RTF_UP | RTF_PREF(pref),
2011                 .fc_nlinfo.portid = 0,
2012                 .fc_nlinfo.nlh = NULL,
2013                 .fc_nlinfo.nl_net = net,
2014         };
2015
2016         cfg.fc_dst = *prefix;
2017         cfg.fc_gateway = *gwaddr;
2018
2019         /* We should treat it as a default route if prefix length is 0. */
2020         if (!prefixlen)
2021                 cfg.fc_flags |= RTF_DEFAULT;
2022
2023         ip6_route_add(&cfg);
2024
2025         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2026 }
2027 #endif
2028
2029 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2030 {
2031         struct rt6_info *rt;
2032         struct fib6_table *table;
2033
2034         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2035         if (!table)
2036                 return NULL;
2037
2038         read_lock_bh(&table->tb6_lock);
2039         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2040                 if (dev == rt->dst.dev &&
2041                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2042                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
2043                         break;
2044         }
2045         if (rt)
2046                 dst_hold(&rt->dst);
2047         read_unlock_bh(&table->tb6_lock);
2048         return rt;
2049 }
2050
2051 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2052                                      struct net_device *dev,
2053                                      unsigned int pref)
2054 {
2055         struct fib6_config cfg = {
2056                 .fc_table       = RT6_TABLE_DFLT,
2057                 .fc_metric      = IP6_RT_PRIO_USER,
2058                 .fc_ifindex     = dev->ifindex,
2059                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2060                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2061                 .fc_nlinfo.portid = 0,
2062                 .fc_nlinfo.nlh = NULL,
2063                 .fc_nlinfo.nl_net = dev_net(dev),
2064         };
2065
2066         cfg.fc_gateway = *gwaddr;
2067
2068         ip6_route_add(&cfg);
2069
2070         return rt6_get_dflt_router(gwaddr, dev);
2071 }
2072
2073 void rt6_purge_dflt_routers(struct net *net)
2074 {
2075         struct rt6_info *rt;
2076         struct fib6_table *table;
2077
2078         /* NOTE: Keep consistent with rt6_get_dflt_router */
2079         table = fib6_get_table(net, RT6_TABLE_DFLT);
2080         if (!table)
2081                 return;
2082
2083 restart:
2084         read_lock_bh(&table->tb6_lock);
2085         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2086                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2087                     (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2088                         dst_hold(&rt->dst);
2089                         read_unlock_bh(&table->tb6_lock);
2090                         ip6_del_rt(rt);
2091                         goto restart;
2092                 }
2093         }
2094         read_unlock_bh(&table->tb6_lock);
2095 }
2096
2097 static void rtmsg_to_fib6_config(struct net *net,
2098                                  struct in6_rtmsg *rtmsg,
2099                                  struct fib6_config *cfg)
2100 {
2101         memset(cfg, 0, sizeof(*cfg));
2102
2103         cfg->fc_table = RT6_TABLE_MAIN;
2104         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2105         cfg->fc_metric = rtmsg->rtmsg_metric;
2106         cfg->fc_expires = rtmsg->rtmsg_info;
2107         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2108         cfg->fc_src_len = rtmsg->rtmsg_src_len;
2109         cfg->fc_flags = rtmsg->rtmsg_flags;
2110
2111         cfg->fc_nlinfo.nl_net = net;
2112
2113         cfg->fc_dst = rtmsg->rtmsg_dst;
2114         cfg->fc_src = rtmsg->rtmsg_src;
2115         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2116 }
2117
2118 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2119 {
2120         struct fib6_config cfg;
2121         struct in6_rtmsg rtmsg;
2122         int err;
2123
2124         switch (cmd) {
2125         case SIOCADDRT:         /* Add a route */
2126         case SIOCDELRT:         /* Delete a route */
2127                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2128                         return -EPERM;
2129                 err = copy_from_user(&rtmsg, arg,
2130                                      sizeof(struct in6_rtmsg));
2131                 if (err)
2132                         return -EFAULT;
2133
2134                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2135
2136                 rtnl_lock();
2137                 switch (cmd) {
2138                 case SIOCADDRT:
2139                         err = ip6_route_add(&cfg);
2140                         break;
2141                 case SIOCDELRT:
2142                         err = ip6_route_del(&cfg);
2143                         break;
2144                 default:
2145                         err = -EINVAL;
2146                 }
2147                 rtnl_unlock();
2148
2149                 return err;
2150         }
2151
2152         return -EINVAL;
2153 }
2154
2155 /*
2156  *      Drop the packet on the floor
2157  */
2158
2159 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2160 {
2161         int type;
2162         struct dst_entry *dst = skb_dst(skb);
2163         switch (ipstats_mib_noroutes) {
2164         case IPSTATS_MIB_INNOROUTES:
2165                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2166                 if (type == IPV6_ADDR_ANY) {
2167                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2168                                       IPSTATS_MIB_INADDRERRORS);
2169                         break;
2170                 }
2171                 /* FALLTHROUGH */
2172         case IPSTATS_MIB_OUTNOROUTES:
2173                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2174                               ipstats_mib_noroutes);
2175                 break;
2176         }
2177         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2178         kfree_skb(skb);
2179         return 0;
2180 }
2181
2182 static int ip6_pkt_discard(struct sk_buff *skb)
2183 {
2184         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2185 }
2186
2187 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
2188 {
2189         skb->dev = skb_dst(skb)->dev;
2190         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2191 }
2192
2193 static int ip6_pkt_prohibit(struct sk_buff *skb)
2194 {
2195         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2196 }
2197
2198 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
2199 {
2200         skb->dev = skb_dst(skb)->dev;
2201         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2202 }
2203
2204 /*
2205  *      Allocate a dst for local (unicast / anycast) address.
2206  */
2207
2208 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2209                                     const struct in6_addr *addr,
2210                                     bool anycast)
2211 {
2212         struct net *net = dev_net(idev->dev);
2213         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2214                                             DST_NOCOUNT, NULL);
2215         if (!rt)
2216                 return ERR_PTR(-ENOMEM);
2217
2218         in6_dev_hold(idev);
2219
2220         rt->dst.flags |= DST_HOST;
2221         rt->dst.input = ip6_input;
2222         rt->dst.output = ip6_output;
2223         rt->rt6i_idev = idev;
2224
2225         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2226         if (anycast)
2227                 rt->rt6i_flags |= RTF_ANYCAST;
2228         else
2229                 rt->rt6i_flags |= RTF_LOCAL;
2230
2231         rt->rt6i_gateway  = *addr;
2232         rt->rt6i_dst.addr = *addr;
2233         rt->rt6i_dst.plen = 128;
2234         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2235
2236         atomic_set(&rt->dst.__refcnt, 1);
2237
2238         return rt;
2239 }
2240
2241 int ip6_route_get_saddr(struct net *net,
2242                         struct rt6_info *rt,
2243                         const struct in6_addr *daddr,
2244                         unsigned int prefs,
2245                         struct in6_addr *saddr)
2246 {
2247         struct inet6_dev *idev =
2248                 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2249         int err = 0;
2250         if (rt && rt->rt6i_prefsrc.plen)
2251                 *saddr = rt->rt6i_prefsrc.addr;
2252         else
2253                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2254                                          daddr, prefs, saddr);
2255         return err;
2256 }
2257
2258 /* remove deleted ip from prefsrc entries */
2259 struct arg_dev_net_ip {
2260         struct net_device *dev;
2261         struct net *net;
2262         struct in6_addr *addr;
2263 };
2264
2265 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2266 {
2267         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2268         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2269         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2270
2271         if (((void *)rt->dst.dev == dev || !dev) &&
2272             rt != net->ipv6.ip6_null_entry &&
2273             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2274                 /* remove prefsrc entry */
2275                 rt->rt6i_prefsrc.plen = 0;
2276         }
2277         return 0;
2278 }
2279
2280 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2281 {
2282         struct net *net = dev_net(ifp->idev->dev);
2283         struct arg_dev_net_ip adni = {
2284                 .dev = ifp->idev->dev,
2285                 .net = net,
2286                 .addr = &ifp->addr,
2287         };
2288         fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2289 }
2290
2291 #define RTF_RA_ROUTER           (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2292 #define RTF_CACHE_GATEWAY       (RTF_GATEWAY | RTF_CACHE)
2293
2294 /* Remove routers and update dst entries when gateway turn into host. */
2295 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2296 {
2297         struct in6_addr *gateway = (struct in6_addr *)arg;
2298
2299         if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2300              ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2301              ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2302                 return -1;
2303         }
2304         return 0;
2305 }
2306
2307 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2308 {
2309         fib6_clean_all(net, fib6_clean_tohost, gateway);
2310 }
2311
2312 struct arg_dev_net {
2313         struct net_device *dev;
2314         struct net *net;
2315 };
2316
2317 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2318 {
2319         const struct arg_dev_net *adn = arg;
2320         const struct net_device *dev = adn->dev;
2321
2322         if ((rt->dst.dev == dev || !dev) &&
2323             rt != adn->net->ipv6.ip6_null_entry)
2324                 return -1;
2325
2326         return 0;
2327 }
2328
2329 void rt6_ifdown(struct net *net, struct net_device *dev)
2330 {
2331         struct arg_dev_net adn = {
2332                 .dev = dev,
2333                 .net = net,
2334         };
2335
2336         fib6_clean_all(net, fib6_ifdown, &adn);
2337         icmp6_clean_all(fib6_ifdown, &adn);
2338 }
2339
2340 struct rt6_mtu_change_arg {
2341         struct net_device *dev;
2342         unsigned int mtu;
2343 };
2344
2345 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2346 {
2347         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2348         struct inet6_dev *idev;
2349
2350         /* In IPv6 pmtu discovery is not optional,
2351            so that RTAX_MTU lock cannot disable it.
2352            We still use this lock to block changes
2353            caused by addrconf/ndisc.
2354         */
2355
2356         idev = __in6_dev_get(arg->dev);
2357         if (!idev)
2358                 return 0;
2359
2360         /* For administrative MTU increase, there is no way to discover
2361            IPv6 PMTU increase, so PMTU increase should be updated here.
2362            Since RFC 1981 doesn't include administrative MTU increase
2363            update PMTU increase is a MUST. (i.e. jumbo frame)
2364          */
2365         /*
2366            If new MTU is less than route PMTU, this new MTU will be the
2367            lowest MTU in the path, update the route PMTU to reflect PMTU
2368            decreases; if new MTU is greater than route PMTU, and the
2369            old MTU is the lowest MTU in the path, update the route PMTU
2370            to reflect the increase. In this case if the other nodes' MTU
2371            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2372            PMTU discouvery.
2373          */
2374         if (rt->dst.dev == arg->dev &&
2375             !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2376                 if (rt->rt6i_flags & RTF_CACHE) {
2377                         /* For RTF_CACHE with rt6i_pmtu == 0
2378                          * (i.e. a redirected route),
2379                          * the metrics of its rt->dst.from has already
2380                          * been updated.
2381                          */
2382                         if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2383                                 rt->rt6i_pmtu = arg->mtu;
2384                 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2385                            (dst_mtu(&rt->dst) < arg->mtu &&
2386                             dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2387                         dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2388                 }
2389         }
2390         return 0;
2391 }
2392
2393 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2394 {
2395         struct rt6_mtu_change_arg arg = {
2396                 .dev = dev,
2397                 .mtu = mtu,
2398         };
2399
2400         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2401 }
2402
2403 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2404         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2405         [RTA_OIF]               = { .type = NLA_U32 },
2406         [RTA_IIF]               = { .type = NLA_U32 },
2407         [RTA_PRIORITY]          = { .type = NLA_U32 },
2408         [RTA_METRICS]           = { .type = NLA_NESTED },
2409         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2410         [RTA_PREF]              = { .type = NLA_U8 },
2411 };
2412
2413 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2414                               struct fib6_config *cfg)
2415 {
2416         struct rtmsg *rtm;
2417         struct nlattr *tb[RTA_MAX+1];
2418         unsigned int pref;
2419         int err;
2420
2421         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2422         if (err < 0)
2423                 goto errout;
2424
2425         err = -EINVAL;
2426         rtm = nlmsg_data(nlh);
2427         memset(cfg, 0, sizeof(*cfg));
2428
2429         cfg->fc_table = rtm->rtm_table;
2430         cfg->fc_dst_len = rtm->rtm_dst_len;
2431         cfg->fc_src_len = rtm->rtm_src_len;
2432         cfg->fc_flags = RTF_UP;
2433         cfg->fc_protocol = rtm->rtm_protocol;
2434         cfg->fc_type = rtm->rtm_type;
2435
2436         if (rtm->rtm_type == RTN_UNREACHABLE ||
2437             rtm->rtm_type == RTN_BLACKHOLE ||
2438             rtm->rtm_type == RTN_PROHIBIT ||
2439             rtm->rtm_type == RTN_THROW)
2440                 cfg->fc_flags |= RTF_REJECT;
2441
2442         if (rtm->rtm_type == RTN_LOCAL)
2443                 cfg->fc_flags |= RTF_LOCAL;
2444
2445         if (rtm->rtm_flags & RTM_F_CLONED)
2446                 cfg->fc_flags |= RTF_CACHE;
2447
2448         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2449         cfg->fc_nlinfo.nlh = nlh;
2450         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2451
2452         if (tb[RTA_GATEWAY]) {
2453                 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2454                 cfg->fc_flags |= RTF_GATEWAY;
2455         }
2456
2457         if (tb[RTA_DST]) {
2458                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2459
2460                 if (nla_len(tb[RTA_DST]) < plen)
2461                         goto errout;
2462
2463                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2464         }
2465
2466         if (tb[RTA_SRC]) {
2467                 int plen = (rtm->rtm_src_len + 7) >> 3;
2468
2469                 if (nla_len(tb[RTA_SRC]) < plen)
2470                         goto errout;
2471
2472                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2473         }
2474
2475         if (tb[RTA_PREFSRC])
2476                 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2477
2478         if (tb[RTA_OIF])
2479                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2480
2481         if (tb[RTA_PRIORITY])
2482                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2483
2484         if (tb[RTA_METRICS]) {
2485                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2486                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2487         }
2488
2489         if (tb[RTA_TABLE])
2490                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2491
2492         if (tb[RTA_MULTIPATH]) {
2493                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2494                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2495         }
2496
2497         if (tb[RTA_PREF]) {
2498                 pref = nla_get_u8(tb[RTA_PREF]);
2499                 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2500                     pref != ICMPV6_ROUTER_PREF_HIGH)
2501                         pref = ICMPV6_ROUTER_PREF_MEDIUM;
2502                 cfg->fc_flags |= RTF_PREF(pref);
2503         }
2504
2505         err = 0;
2506 errout:
2507         return err;
2508 }
2509
2510 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2511 {
2512         struct fib6_config r_cfg;
2513         struct rtnexthop *rtnh;
2514         int remaining;
2515         int attrlen;
2516         int err = 0, last_err = 0;
2517
2518         remaining = cfg->fc_mp_len;
2519 beginning:
2520         rtnh = (struct rtnexthop *)cfg->fc_mp;
2521
2522         /* Parse a Multipath Entry */
2523         while (rtnh_ok(rtnh, remaining)) {
2524                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2525                 if (rtnh->rtnh_ifindex)
2526                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2527
2528                 attrlen = rtnh_attrlen(rtnh);
2529                 if (attrlen > 0) {
2530                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2531
2532                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2533                         if (nla) {
2534                                 r_cfg.fc_gateway = nla_get_in6_addr(nla);
2535                                 r_cfg.fc_flags |= RTF_GATEWAY;
2536                         }
2537                 }
2538                 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2539                 if (err) {
2540                         last_err = err;
2541                         /* If we are trying to remove a route, do not stop the
2542                          * loop when ip6_route_del() fails (because next hop is
2543                          * already gone), we should try to remove all next hops.
2544                          */
2545                         if (add) {
2546                                 /* If add fails, we should try to delete all
2547                                  * next hops that have been already added.
2548                                  */
2549                                 add = 0;
2550                                 remaining = cfg->fc_mp_len - remaining;
2551                                 goto beginning;
2552                         }
2553                 }
2554                 /* Because each route is added like a single route we remove
2555                  * these flags after the first nexthop: if there is a collision,
2556                  * we have already failed to add the first nexthop:
2557                  * fib6_add_rt2node() has rejected it; when replacing, old
2558                  * nexthops have been replaced by first new, the rest should
2559                  * be added to it.
2560                  */
2561                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2562                                                      NLM_F_REPLACE);
2563                 rtnh = rtnh_next(rtnh, &remaining);
2564         }
2565
2566         return last_err;
2567 }
2568
2569 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2570 {
2571         struct fib6_config cfg;
2572         int err;
2573
2574         err = rtm_to_fib6_config(skb, nlh, &cfg);
2575         if (err < 0)
2576                 return err;
2577
2578         if (cfg.fc_mp)
2579                 return ip6_route_multipath(&cfg, 0);
2580         else
2581                 return ip6_route_del(&cfg);
2582 }
2583
2584 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2585 {
2586         struct fib6_config cfg;
2587         int err;
2588
2589         err = rtm_to_fib6_config(skb, nlh, &cfg);
2590         if (err < 0)
2591                 return err;
2592
2593         if (cfg.fc_mp)
2594                 return ip6_route_multipath(&cfg, 1);
2595         else
2596                 return ip6_route_add(&cfg);
2597 }
2598
2599 static inline size_t rt6_nlmsg_size(void)
2600 {
2601         return NLMSG_ALIGN(sizeof(struct rtmsg))
2602                + nla_total_size(16) /* RTA_SRC */
2603                + nla_total_size(16) /* RTA_DST */
2604                + nla_total_size(16) /* RTA_GATEWAY */
2605                + nla_total_size(16) /* RTA_PREFSRC */
2606                + nla_total_size(4) /* RTA_TABLE */
2607                + nla_total_size(4) /* RTA_IIF */
2608                + nla_total_size(4) /* RTA_OIF */
2609                + nla_total_size(4) /* RTA_PRIORITY */
2610                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2611                + nla_total_size(sizeof(struct rta_cacheinfo))
2612                + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
2613                + nla_total_size(1); /* RTA_PREF */
2614 }
2615
2616 static int rt6_fill_node(struct net *net,
2617                          struct sk_buff *skb, struct rt6_info *rt,
2618                          struct in6_addr *dst, struct in6_addr *src,
2619                          int iif, int type, u32 portid, u32 seq,
2620                          int prefix, int nowait, unsigned int flags)
2621 {
2622         u32 metrics[RTAX_MAX];
2623         struct rtmsg *rtm;
2624         struct nlmsghdr *nlh;
2625         long expires;
2626         u32 table;
2627
2628         if (prefix) {   /* user wants prefix routes only */
2629                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2630                         /* success since this is not a prefix route */
2631                         return 1;
2632                 }
2633         }
2634
2635         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2636         if (!nlh)
2637                 return -EMSGSIZE;
2638
2639         rtm = nlmsg_data(nlh);
2640         rtm->rtm_family = AF_INET6;
2641         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2642         rtm->rtm_src_len = rt->rt6i_src.plen;
2643         rtm->rtm_tos = 0;
2644         if (rt->rt6i_table)
2645                 table = rt->rt6i_table->tb6_id;
2646         else
2647                 table = RT6_TABLE_UNSPEC;
2648         rtm->rtm_table = table;
2649         if (nla_put_u32(skb, RTA_TABLE, table))
2650                 goto nla_put_failure;
2651         if (rt->rt6i_flags & RTF_REJECT) {
2652                 switch (rt->dst.error) {
2653                 case -EINVAL:
2654                         rtm->rtm_type = RTN_BLACKHOLE;
2655                         break;
2656                 case -EACCES:
2657                         rtm->rtm_type = RTN_PROHIBIT;
2658                         break;
2659                 case -EAGAIN:
2660                         rtm->rtm_type = RTN_THROW;
2661                         break;
2662                 default:
2663                         rtm->rtm_type = RTN_UNREACHABLE;
2664                         break;
2665                 }
2666         }
2667         else if (rt->rt6i_flags & RTF_LOCAL)
2668                 rtm->rtm_type = RTN_LOCAL;
2669         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2670                 rtm->rtm_type = RTN_LOCAL;
2671         else
2672                 rtm->rtm_type = RTN_UNICAST;
2673         rtm->rtm_flags = 0;
2674         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2675         rtm->rtm_protocol = rt->rt6i_protocol;
2676         if (rt->rt6i_flags & RTF_DYNAMIC)
2677                 rtm->rtm_protocol = RTPROT_REDIRECT;
2678         else if (rt->rt6i_flags & RTF_ADDRCONF) {
2679                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2680                         rtm->rtm_protocol = RTPROT_RA;
2681                 else
2682                         rtm->rtm_protocol = RTPROT_KERNEL;
2683         }
2684
2685         if (rt->rt6i_flags & RTF_CACHE)
2686                 rtm->rtm_flags |= RTM_F_CLONED;
2687
2688         if (dst) {
2689                 if (nla_put_in6_addr(skb, RTA_DST, dst))
2690                         goto nla_put_failure;
2691                 rtm->rtm_dst_len = 128;
2692         } else if (rtm->rtm_dst_len)
2693                 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
2694                         goto nla_put_failure;
2695 #ifdef CONFIG_IPV6_SUBTREES
2696         if (src) {
2697                 if (nla_put_in6_addr(skb, RTA_SRC, src))
2698                         goto nla_put_failure;
2699                 rtm->rtm_src_len = 128;
2700         } else if (rtm->rtm_src_len &&
2701                    nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
2702                 goto nla_put_failure;
2703 #endif
2704         if (iif) {
2705 #ifdef CONFIG_IPV6_MROUTE
2706                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2707                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2708                         if (err <= 0) {
2709                                 if (!nowait) {
2710                                         if (err == 0)
2711                                                 return 0;
2712                                         goto nla_put_failure;
2713                                 } else {
2714                                         if (err == -EMSGSIZE)
2715                                                 goto nla_put_failure;
2716                                 }
2717                         }
2718                 } else
2719 #endif
2720                         if (nla_put_u32(skb, RTA_IIF, iif))
2721                                 goto nla_put_failure;
2722         } else if (dst) {
2723                 struct in6_addr saddr_buf;
2724                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2725                     nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2726                         goto nla_put_failure;
2727         }
2728
2729         if (rt->rt6i_prefsrc.plen) {
2730                 struct in6_addr saddr_buf;
2731                 saddr_buf = rt->rt6i_prefsrc.addr;
2732                 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2733                         goto nla_put_failure;
2734         }
2735
2736         memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
2737         if (rt->rt6i_pmtu)
2738                 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
2739         if (rtnetlink_put_metrics(skb, metrics) < 0)
2740                 goto nla_put_failure;
2741
2742         if (rt->rt6i_flags & RTF_GATEWAY) {
2743                 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
2744                         goto nla_put_failure;
2745         }
2746
2747         if (rt->dst.dev &&
2748             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2749                 goto nla_put_failure;
2750         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2751                 goto nla_put_failure;
2752
2753         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2754
2755         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2756                 goto nla_put_failure;
2757
2758         if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
2759                 goto nla_put_failure;
2760
2761         nlmsg_end(skb, nlh);
2762         return 0;
2763
2764 nla_put_failure:
2765         nlmsg_cancel(skb, nlh);
2766         return -EMSGSIZE;
2767 }
2768
2769 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2770 {
2771         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2772         int prefix;
2773
2774         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2775                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2776                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2777         } else
2778                 prefix = 0;
2779
2780         return rt6_fill_node(arg->net,
2781                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2782                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2783                      prefix, 0, NLM_F_MULTI);
2784 }
2785
2786 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
2787 {
2788         struct net *net = sock_net(in_skb->sk);
2789         struct nlattr *tb[RTA_MAX+1];
2790         struct rt6_info *rt;
2791         struct sk_buff *skb;
2792         struct rtmsg *rtm;
2793         struct flowi6 fl6;
2794         int err, iif = 0, oif = 0;
2795
2796         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2797         if (err < 0)
2798                 goto errout;
2799
2800         err = -EINVAL;
2801         memset(&fl6, 0, sizeof(fl6));
2802
2803         if (tb[RTA_SRC]) {
2804                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2805                         goto errout;
2806
2807                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2808         }
2809
2810         if (tb[RTA_DST]) {
2811                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2812                         goto errout;
2813
2814                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2815         }
2816
2817         if (tb[RTA_IIF])
2818                 iif = nla_get_u32(tb[RTA_IIF]);
2819
2820         if (tb[RTA_OIF])
2821                 oif = nla_get_u32(tb[RTA_OIF]);
2822
2823         if (tb[RTA_MARK])
2824                 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
2825
2826         if (iif) {
2827                 struct net_device *dev;
2828                 int flags = 0;
2829
2830                 dev = __dev_get_by_index(net, iif);
2831                 if (!dev) {
2832                         err = -ENODEV;
2833                         goto errout;
2834                 }
2835
2836                 fl6.flowi6_iif = iif;
2837
2838                 if (!ipv6_addr_any(&fl6.saddr))
2839                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2840
2841                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2842                                                                flags);
2843         } else {
2844                 fl6.flowi6_oif = oif;
2845
2846                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2847         }
2848
2849         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2850         if (!skb) {
2851                 ip6_rt_put(rt);
2852                 err = -ENOBUFS;
2853                 goto errout;
2854         }
2855
2856         /* Reserve room for dummy headers, this skb can pass
2857            through good chunk of routing engine.
2858          */
2859         skb_reset_mac_header(skb);
2860         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2861
2862         skb_dst_set(skb, &rt->dst);
2863
2864         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2865                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2866                             nlh->nlmsg_seq, 0, 0, 0);
2867         if (err < 0) {
2868                 kfree_skb(skb);
2869                 goto errout;
2870         }
2871
2872         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2873 errout:
2874         return err;
2875 }
2876
2877 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2878 {
2879         struct sk_buff *skb;
2880         struct net *net = info->nl_net;
2881         u32 seq;
2882         int err;
2883
2884         err = -ENOBUFS;
2885         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2886
2887         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2888         if (!skb)
2889                 goto errout;
2890
2891         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2892                                 event, info->portid, seq, 0, 0, 0);
2893         if (err < 0) {
2894                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2895                 WARN_ON(err == -EMSGSIZE);
2896                 kfree_skb(skb);
2897                 goto errout;
2898         }
2899         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2900                     info->nlh, gfp_any());
2901         return;
2902 errout:
2903         if (err < 0)
2904                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2905 }
2906
2907 static int ip6_route_dev_notify(struct notifier_block *this,
2908                                 unsigned long event, void *ptr)
2909 {
2910         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2911         struct net *net = dev_net(dev);
2912
2913         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2914                 net->ipv6.ip6_null_entry->dst.dev = dev;
2915                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2916 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2917                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2918                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2919                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2920                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2921 #endif
2922         }
2923
2924         return NOTIFY_OK;
2925 }
2926
2927 /*
2928  *      /proc
2929  */
2930
2931 #ifdef CONFIG_PROC_FS
2932
2933 static const struct file_operations ipv6_route_proc_fops = {
2934         .owner          = THIS_MODULE,
2935         .open           = ipv6_route_open,
2936         .read           = seq_read,
2937         .llseek         = seq_lseek,
2938         .release        = seq_release_net,
2939 };
2940
2941 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2942 {
2943         struct net *net = (struct net *)seq->private;
2944         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2945                    net->ipv6.rt6_stats->fib_nodes,
2946                    net->ipv6.rt6_stats->fib_route_nodes,
2947                    net->ipv6.rt6_stats->fib_rt_alloc,
2948                    net->ipv6.rt6_stats->fib_rt_entries,
2949                    net->ipv6.rt6_stats->fib_rt_cache,
2950                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2951                    net->ipv6.rt6_stats->fib_discarded_routes);
2952
2953         return 0;
2954 }
2955
2956 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2957 {
2958         return single_open_net(inode, file, rt6_stats_seq_show);
2959 }
2960
2961 static const struct file_operations rt6_stats_seq_fops = {
2962         .owner   = THIS_MODULE,
2963         .open    = rt6_stats_seq_open,
2964         .read    = seq_read,
2965         .llseek  = seq_lseek,
2966         .release = single_release_net,
2967 };
2968 #endif  /* CONFIG_PROC_FS */
2969
2970 #ifdef CONFIG_SYSCTL
2971
2972 static
2973 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
2974                               void __user *buffer, size_t *lenp, loff_t *ppos)
2975 {
2976         struct net *net;
2977         int delay;
2978         if (!write)
2979                 return -EINVAL;
2980
2981         net = (struct net *)ctl->extra1;
2982         delay = net->ipv6.sysctl.flush_delay;
2983         proc_dointvec(ctl, write, buffer, lenp, ppos);
2984         fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
2985         return 0;
2986 }
2987
2988 struct ctl_table ipv6_route_table_template[] = {
2989         {
2990                 .procname       =       "flush",
2991                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2992                 .maxlen         =       sizeof(int),
2993                 .mode           =       0200,
2994                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2995         },
2996         {
2997                 .procname       =       "gc_thresh",
2998                 .data           =       &ip6_dst_ops_template.gc_thresh,
2999                 .maxlen         =       sizeof(int),
3000                 .mode           =       0644,
3001                 .proc_handler   =       proc_dointvec,
3002         },
3003         {
3004                 .procname       =       "max_size",
3005                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
3006                 .maxlen         =       sizeof(int),
3007                 .mode           =       0644,
3008                 .proc_handler   =       proc_dointvec,
3009         },
3010         {
3011                 .procname       =       "gc_min_interval",
3012                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3013                 .maxlen         =       sizeof(int),
3014                 .mode           =       0644,
3015                 .proc_handler   =       proc_dointvec_jiffies,
3016         },
3017         {
3018                 .procname       =       "gc_timeout",
3019                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3020                 .maxlen         =       sizeof(int),
3021                 .mode           =       0644,
3022                 .proc_handler   =       proc_dointvec_jiffies,
3023         },
3024         {
3025                 .procname       =       "gc_interval",
3026                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3027                 .maxlen         =       sizeof(int),
3028                 .mode           =       0644,
3029                 .proc_handler   =       proc_dointvec_jiffies,
3030         },
3031         {
3032                 .procname       =       "gc_elasticity",
3033                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3034                 .maxlen         =       sizeof(int),
3035                 .mode           =       0644,
3036                 .proc_handler   =       proc_dointvec,
3037         },
3038         {
3039                 .procname       =       "mtu_expires",
3040                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3041                 .maxlen         =       sizeof(int),
3042                 .mode           =       0644,
3043                 .proc_handler   =       proc_dointvec_jiffies,
3044         },
3045         {
3046                 .procname       =       "min_adv_mss",
3047                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3048                 .maxlen         =       sizeof(int),
3049                 .mode           =       0644,
3050                 .proc_handler   =       proc_dointvec,
3051         },
3052         {
3053                 .procname       =       "gc_min_interval_ms",
3054                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3055                 .maxlen         =       sizeof(int),
3056                 .mode           =       0644,
3057                 .proc_handler   =       proc_dointvec_ms_jiffies,
3058         },
3059         { }
3060 };
3061
3062 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3063 {
3064         struct ctl_table *table;
3065
3066         table = kmemdup(ipv6_route_table_template,
3067                         sizeof(ipv6_route_table_template),
3068                         GFP_KERNEL);
3069
3070         if (table) {
3071                 table[0].data = &net->ipv6.sysctl.flush_delay;
3072                 table[0].extra1 = net;
3073                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3074                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3075                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3076                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3077                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3078                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3079                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3080                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3081                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3082
3083                 /* Don't export sysctls to unprivileged users */
3084                 if (net->user_ns != &init_user_ns)
3085                         table[0].procname = NULL;
3086         }
3087
3088         return table;
3089 }
3090 #endif
3091
3092 static int __net_init ip6_route_net_init(struct net *net)
3093 {
3094         int ret = -ENOMEM;
3095
3096         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3097                sizeof(net->ipv6.ip6_dst_ops));
3098
3099         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3100                 goto out_ip6_dst_ops;
3101
3102         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3103                                            sizeof(*net->ipv6.ip6_null_entry),
3104                                            GFP_KERNEL);
3105         if (!net->ipv6.ip6_null_entry)
3106                 goto out_ip6_dst_entries;
3107         net->ipv6.ip6_null_entry->dst.path =
3108                 (struct dst_entry *)net->ipv6.ip6_null_entry;
3109         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3110         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3111                          ip6_template_metrics, true);
3112
3113 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3114         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3115                                                sizeof(*net->ipv6.ip6_prohibit_entry),
3116                                                GFP_KERNEL);
3117         if (!net->ipv6.ip6_prohibit_entry)
3118                 goto out_ip6_null_entry;
3119         net->ipv6.ip6_prohibit_entry->dst.path =
3120                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3121         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3122         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3123                          ip6_template_metrics, true);
3124
3125         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3126                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
3127                                                GFP_KERNEL);
3128         if (!net->ipv6.ip6_blk_hole_entry)
3129                 goto out_ip6_prohibit_entry;
3130         net->ipv6.ip6_blk_hole_entry->dst.path =
3131                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3132         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3133         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3134                          ip6_template_metrics, true);
3135 #endif
3136
3137         net->ipv6.sysctl.flush_delay = 0;
3138         net->ipv6.sysctl.ip6_rt_max_size = 4096;
3139         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3140         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3141         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3142         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3143         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3144         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3145
3146         net->ipv6.ip6_rt_gc_expire = 30*HZ;
3147
3148         ret = 0;
3149 out:
3150         return ret;
3151
3152 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3153 out_ip6_prohibit_entry:
3154         kfree(net->ipv6.ip6_prohibit_entry);
3155 out_ip6_null_entry:
3156         kfree(net->ipv6.ip6_null_entry);
3157 #endif
3158 out_ip6_dst_entries:
3159         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3160 out_ip6_dst_ops:
3161         goto out;
3162 }
3163
3164 static void __net_exit ip6_route_net_exit(struct net *net)
3165 {
3166         kfree(net->ipv6.ip6_null_entry);
3167 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3168         kfree(net->ipv6.ip6_prohibit_entry);
3169         kfree(net->ipv6.ip6_blk_hole_entry);
3170 #endif
3171         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3172 }
3173
3174 static int __net_init ip6_route_net_init_late(struct net *net)
3175 {
3176 #ifdef CONFIG_PROC_FS
3177         proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3178         proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3179 #endif
3180         return 0;
3181 }
3182
3183 static void __net_exit ip6_route_net_exit_late(struct net *net)
3184 {
3185 #ifdef CONFIG_PROC_FS
3186         remove_proc_entry("ipv6_route", net->proc_net);
3187         remove_proc_entry("rt6_stats", net->proc_net);
3188 #endif
3189 }
3190
3191 static struct pernet_operations ip6_route_net_ops = {
3192         .init = ip6_route_net_init,
3193         .exit = ip6_route_net_exit,
3194 };
3195
3196 static int __net_init ipv6_inetpeer_init(struct net *net)
3197 {
3198         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3199
3200         if (!bp)
3201                 return -ENOMEM;
3202         inet_peer_base_init(bp);
3203         net->ipv6.peers = bp;
3204         return 0;
3205 }
3206
3207 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3208 {
3209         struct inet_peer_base *bp = net->ipv6.peers;
3210
3211         net->ipv6.peers = NULL;
3212         inetpeer_invalidate_tree(bp);
3213         kfree(bp);
3214 }
3215
3216 static struct pernet_operations ipv6_inetpeer_ops = {
3217         .init   =       ipv6_inetpeer_init,
3218         .exit   =       ipv6_inetpeer_exit,
3219 };
3220
3221 static struct pernet_operations ip6_route_net_late_ops = {
3222         .init = ip6_route_net_init_late,
3223         .exit = ip6_route_net_exit_late,
3224 };
3225
3226 static struct notifier_block ip6_route_dev_notifier = {
3227         .notifier_call = ip6_route_dev_notify,
3228         .priority = 0,
3229 };
3230
3231 int __init ip6_route_init(void)
3232 {
3233         int ret;
3234
3235         ret = -ENOMEM;
3236         ip6_dst_ops_template.kmem_cachep =
3237                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3238                                   SLAB_HWCACHE_ALIGN, NULL);
3239         if (!ip6_dst_ops_template.kmem_cachep)
3240                 goto out;
3241
3242         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3243         if (ret)
3244                 goto out_kmem_cache;
3245
3246         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3247         if (ret)
3248                 goto out_dst_entries;
3249
3250         ret = register_pernet_subsys(&ip6_route_net_ops);
3251         if (ret)
3252                 goto out_register_inetpeer;
3253
3254         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3255
3256         /* Registering of the loopback is done before this portion of code,
3257          * the loopback reference in rt6_info will not be taken, do it
3258          * manually for init_net */
3259         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3260         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3261   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3262         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3263         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3264         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3265         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3266   #endif
3267         ret = fib6_init();
3268         if (ret)
3269                 goto out_register_subsys;
3270
3271         ret = xfrm6_init();
3272         if (ret)
3273                 goto out_fib6_init;
3274
3275         ret = fib6_rules_init();
3276         if (ret)
3277                 goto xfrm6_init;
3278
3279         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3280         if (ret)
3281                 goto fib6_rules_init;
3282
3283         ret = -ENOBUFS;
3284         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3285             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3286             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3287                 goto out_register_late_subsys;
3288
3289         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3290         if (ret)
3291                 goto out_register_late_subsys;
3292
3293 out:
3294         return ret;
3295
3296 out_register_late_subsys:
3297         unregister_pernet_subsys(&ip6_route_net_late_ops);
3298 fib6_rules_init:
3299         fib6_rules_cleanup();
3300 xfrm6_init:
3301         xfrm6_fini();
3302 out_fib6_init:
3303         fib6_gc_cleanup();
3304 out_register_subsys:
3305         unregister_pernet_subsys(&ip6_route_net_ops);
3306 out_register_inetpeer:
3307         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3308 out_dst_entries:
3309         dst_entries_destroy(&ip6_dst_blackhole_ops);
3310 out_kmem_cache:
3311         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3312         goto out;
3313 }
3314
3315 void ip6_route_cleanup(void)
3316 {
3317         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3318         unregister_pernet_subsys(&ip6_route_net_late_ops);
3319         fib6_rules_cleanup();
3320         xfrm6_fini();
3321         fib6_gc_cleanup();
3322         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3323         unregister_pernet_subsys(&ip6_route_net_ops);
3324         dst_entries_destroy(&ip6_dst_blackhole_ops);
3325         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3326 }