3ac19eb81a86ef28f4efd7f11d32a5f6dc57c5af
[cascardo/linux.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65 #include <trace/events/fib6.h>
66
67 #include <asm/uaccess.h>
68
69 #ifdef CONFIG_SYSCTL
70 #include <linux/sysctl.h>
71 #endif
72
73 enum rt6_nud_state {
74         RT6_NUD_FAIL_HARD = -3,
75         RT6_NUD_FAIL_PROBE = -2,
76         RT6_NUD_FAIL_DO_RR = -1,
77         RT6_NUD_SUCCEED = 1
78 };
79
80 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
81 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
82 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
83 static unsigned int      ip6_mtu(const struct dst_entry *dst);
84 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85 static void             ip6_dst_destroy(struct dst_entry *);
86 static void             ip6_dst_ifdown(struct dst_entry *,
87                                        struct net_device *dev, int how);
88 static int               ip6_dst_gc(struct dst_ops *ops);
89
90 static int              ip6_pkt_discard(struct sk_buff *skb);
91 static int              ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
92 static int              ip6_pkt_prohibit(struct sk_buff *skb);
93 static int              ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
94 static void             ip6_link_failure(struct sk_buff *skb);
95 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96                                            struct sk_buff *skb, u32 mtu);
97 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98                                         struct sk_buff *skb);
99 static void             rt6_dst_from_metrics_check(struct rt6_info *rt);
100 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
101
102 #ifdef CONFIG_IPV6_ROUTE_INFO
103 static struct rt6_info *rt6_add_route_info(struct net *net,
104                                            const struct in6_addr *prefix, int prefixlen,
105                                            const struct in6_addr *gwaddr,
106                                            struct net_device *dev,
107                                            unsigned int pref);
108 static struct rt6_info *rt6_get_route_info(struct net *net,
109                                            const struct in6_addr *prefix, int prefixlen,
110                                            const struct in6_addr *gwaddr,
111                                            struct net_device *dev);
112 #endif
113
114 struct uncached_list {
115         spinlock_t              lock;
116         struct list_head        head;
117 };
118
119 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
120
121 static void rt6_uncached_list_add(struct rt6_info *rt)
122 {
123         struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
124
125         rt->dst.flags |= DST_NOCACHE;
126         rt->rt6i_uncached_list = ul;
127
128         spin_lock_bh(&ul->lock);
129         list_add_tail(&rt->rt6i_uncached, &ul->head);
130         spin_unlock_bh(&ul->lock);
131 }
132
133 static void rt6_uncached_list_del(struct rt6_info *rt)
134 {
135         if (!list_empty(&rt->rt6i_uncached)) {
136                 struct uncached_list *ul = rt->rt6i_uncached_list;
137
138                 spin_lock_bh(&ul->lock);
139                 list_del(&rt->rt6i_uncached);
140                 spin_unlock_bh(&ul->lock);
141         }
142 }
143
144 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
145 {
146         struct net_device *loopback_dev = net->loopback_dev;
147         int cpu;
148
149         if (dev == loopback_dev)
150                 return;
151
152         for_each_possible_cpu(cpu) {
153                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
154                 struct rt6_info *rt;
155
156                 spin_lock_bh(&ul->lock);
157                 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
158                         struct inet6_dev *rt_idev = rt->rt6i_idev;
159                         struct net_device *rt_dev = rt->dst.dev;
160
161                         if (rt_idev->dev == dev) {
162                                 rt->rt6i_idev = in6_dev_get(loopback_dev);
163                                 in6_dev_put(rt_idev);
164                         }
165
166                         if (rt_dev == dev) {
167                                 rt->dst.dev = loopback_dev;
168                                 dev_hold(rt->dst.dev);
169                                 dev_put(rt_dev);
170                         }
171                 }
172                 spin_unlock_bh(&ul->lock);
173         }
174 }
175
176 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
177 {
178         return dst_metrics_write_ptr(rt->dst.from);
179 }
180
181 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
182 {
183         struct rt6_info *rt = (struct rt6_info *)dst;
184
185         if (rt->rt6i_flags & RTF_PCPU)
186                 return rt6_pcpu_cow_metrics(rt);
187         else if (rt->rt6i_flags & RTF_CACHE)
188                 return NULL;
189         else
190                 return dst_cow_metrics_generic(dst, old);
191 }
192
193 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
194                                              struct sk_buff *skb,
195                                              const void *daddr)
196 {
197         struct in6_addr *p = &rt->rt6i_gateway;
198
199         if (!ipv6_addr_any(p))
200                 return (const void *) p;
201         else if (skb)
202                 return &ipv6_hdr(skb)->daddr;
203         return daddr;
204 }
205
206 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
207                                           struct sk_buff *skb,
208                                           const void *daddr)
209 {
210         struct rt6_info *rt = (struct rt6_info *) dst;
211         struct neighbour *n;
212
213         daddr = choose_neigh_daddr(rt, skb, daddr);
214         n = __ipv6_neigh_lookup(dst->dev, daddr);
215         if (n)
216                 return n;
217         return neigh_create(&nd_tbl, daddr, dst->dev);
218 }
219
220 static struct dst_ops ip6_dst_ops_template = {
221         .family                 =       AF_INET6,
222         .gc                     =       ip6_dst_gc,
223         .gc_thresh              =       1024,
224         .check                  =       ip6_dst_check,
225         .default_advmss         =       ip6_default_advmss,
226         .mtu                    =       ip6_mtu,
227         .cow_metrics            =       ipv6_cow_metrics,
228         .destroy                =       ip6_dst_destroy,
229         .ifdown                 =       ip6_dst_ifdown,
230         .negative_advice        =       ip6_negative_advice,
231         .link_failure           =       ip6_link_failure,
232         .update_pmtu            =       ip6_rt_update_pmtu,
233         .redirect               =       rt6_do_redirect,
234         .local_out              =       __ip6_local_out,
235         .neigh_lookup           =       ip6_neigh_lookup,
236 };
237
238 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
239 {
240         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
241
242         return mtu ? : dst->dev->mtu;
243 }
244
245 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
246                                          struct sk_buff *skb, u32 mtu)
247 {
248 }
249
250 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
251                                       struct sk_buff *skb)
252 {
253 }
254
255 static struct dst_ops ip6_dst_blackhole_ops = {
256         .family                 =       AF_INET6,
257         .destroy                =       ip6_dst_destroy,
258         .check                  =       ip6_dst_check,
259         .mtu                    =       ip6_blackhole_mtu,
260         .default_advmss         =       ip6_default_advmss,
261         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
262         .redirect               =       ip6_rt_blackhole_redirect,
263         .cow_metrics            =       dst_cow_metrics_generic,
264         .neigh_lookup           =       ip6_neigh_lookup,
265 };
266
267 static const u32 ip6_template_metrics[RTAX_MAX] = {
268         [RTAX_HOPLIMIT - 1] = 0,
269 };
270
271 static const struct rt6_info ip6_null_entry_template = {
272         .dst = {
273                 .__refcnt       = ATOMIC_INIT(1),
274                 .__use          = 1,
275                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
276                 .error          = -ENETUNREACH,
277                 .input          = ip6_pkt_discard,
278                 .output         = ip6_pkt_discard_out,
279         },
280         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
281         .rt6i_protocol  = RTPROT_KERNEL,
282         .rt6i_metric    = ~(u32) 0,
283         .rt6i_ref       = ATOMIC_INIT(1),
284 };
285
286 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
287
288 static const struct rt6_info ip6_prohibit_entry_template = {
289         .dst = {
290                 .__refcnt       = ATOMIC_INIT(1),
291                 .__use          = 1,
292                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
293                 .error          = -EACCES,
294                 .input          = ip6_pkt_prohibit,
295                 .output         = ip6_pkt_prohibit_out,
296         },
297         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
298         .rt6i_protocol  = RTPROT_KERNEL,
299         .rt6i_metric    = ~(u32) 0,
300         .rt6i_ref       = ATOMIC_INIT(1),
301 };
302
303 static const struct rt6_info ip6_blk_hole_entry_template = {
304         .dst = {
305                 .__refcnt       = ATOMIC_INIT(1),
306                 .__use          = 1,
307                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
308                 .error          = -EINVAL,
309                 .input          = dst_discard,
310                 .output         = dst_discard_out,
311         },
312         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
313         .rt6i_protocol  = RTPROT_KERNEL,
314         .rt6i_metric    = ~(u32) 0,
315         .rt6i_ref       = ATOMIC_INIT(1),
316 };
317
318 #endif
319
320 static void rt6_info_init(struct rt6_info *rt)
321 {
322         struct dst_entry *dst = &rt->dst;
323
324         memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
325         INIT_LIST_HEAD(&rt->rt6i_siblings);
326         INIT_LIST_HEAD(&rt->rt6i_uncached);
327 }
328
329 /* allocate dst with ip6_dst_ops */
330 static struct rt6_info *__ip6_dst_alloc(struct net *net,
331                                         struct net_device *dev,
332                                         int flags)
333 {
334         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
335                                         0, DST_OBSOLETE_FORCE_CHK, flags);
336
337         if (rt)
338                 rt6_info_init(rt);
339
340         return rt;
341 }
342
343 struct rt6_info *ip6_dst_alloc(struct net *net,
344                                struct net_device *dev,
345                                int flags)
346 {
347         struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
348
349         if (rt) {
350                 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
351                 if (rt->rt6i_pcpu) {
352                         int cpu;
353
354                         for_each_possible_cpu(cpu) {
355                                 struct rt6_info **p;
356
357                                 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
358                                 /* no one shares rt */
359                                 *p =  NULL;
360                         }
361                 } else {
362                         dst_destroy((struct dst_entry *)rt);
363                         return NULL;
364                 }
365         }
366
367         return rt;
368 }
369 EXPORT_SYMBOL(ip6_dst_alloc);
370
371 static void ip6_dst_destroy(struct dst_entry *dst)
372 {
373         struct rt6_info *rt = (struct rt6_info *)dst;
374         struct dst_entry *from = dst->from;
375         struct inet6_dev *idev;
376
377         dst_destroy_metrics_generic(dst);
378         free_percpu(rt->rt6i_pcpu);
379         rt6_uncached_list_del(rt);
380
381         idev = rt->rt6i_idev;
382         if (idev) {
383                 rt->rt6i_idev = NULL;
384                 in6_dev_put(idev);
385         }
386
387         dst->from = NULL;
388         dst_release(from);
389 }
390
391 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
392                            int how)
393 {
394         struct rt6_info *rt = (struct rt6_info *)dst;
395         struct inet6_dev *idev = rt->rt6i_idev;
396         struct net_device *loopback_dev =
397                 dev_net(dev)->loopback_dev;
398
399         if (dev != loopback_dev) {
400                 if (idev && idev->dev == dev) {
401                         struct inet6_dev *loopback_idev =
402                                 in6_dev_get(loopback_dev);
403                         if (loopback_idev) {
404                                 rt->rt6i_idev = loopback_idev;
405                                 in6_dev_put(idev);
406                         }
407                 }
408         }
409 }
410
411 static bool __rt6_check_expired(const struct rt6_info *rt)
412 {
413         if (rt->rt6i_flags & RTF_EXPIRES)
414                 return time_after(jiffies, rt->dst.expires);
415         else
416                 return false;
417 }
418
419 static bool rt6_check_expired(const struct rt6_info *rt)
420 {
421         if (rt->rt6i_flags & RTF_EXPIRES) {
422                 if (time_after(jiffies, rt->dst.expires))
423                         return true;
424         } else if (rt->dst.from) {
425                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
426         }
427         return false;
428 }
429
430 /* Multipath route selection:
431  *   Hash based function using packet header and flowlabel.
432  * Adapted from fib_info_hashfn()
433  */
434 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
435                                const struct flowi6 *fl6)
436 {
437         return get_hash_from_flowi6(fl6) % candidate_count;
438 }
439
440 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
441                                              struct flowi6 *fl6, int oif,
442                                              int strict)
443 {
444         struct rt6_info *sibling, *next_sibling;
445         int route_choosen;
446
447         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
448         /* Don't change the route, if route_choosen == 0
449          * (siblings does not include ourself)
450          */
451         if (route_choosen)
452                 list_for_each_entry_safe(sibling, next_sibling,
453                                 &match->rt6i_siblings, rt6i_siblings) {
454                         route_choosen--;
455                         if (route_choosen == 0) {
456                                 if (rt6_score_route(sibling, oif, strict) < 0)
457                                         break;
458                                 match = sibling;
459                                 break;
460                         }
461                 }
462         return match;
463 }
464
465 /*
466  *      Route lookup. Any table->tb6_lock is implied.
467  */
468
469 static inline struct rt6_info *rt6_device_match(struct net *net,
470                                                     struct rt6_info *rt,
471                                                     const struct in6_addr *saddr,
472                                                     int oif,
473                                                     int flags)
474 {
475         struct rt6_info *local = NULL;
476         struct rt6_info *sprt;
477
478         if (!oif && ipv6_addr_any(saddr))
479                 goto out;
480
481         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
482                 struct net_device *dev = sprt->dst.dev;
483
484                 if (oif) {
485                         if (dev->ifindex == oif)
486                                 return sprt;
487                         if (dev->flags & IFF_LOOPBACK) {
488                                 if (!sprt->rt6i_idev ||
489                                     sprt->rt6i_idev->dev->ifindex != oif) {
490                                         if (flags & RT6_LOOKUP_F_IFACE)
491                                                 continue;
492                                         if (local &&
493                                             local->rt6i_idev->dev->ifindex == oif)
494                                                 continue;
495                                 }
496                                 local = sprt;
497                         }
498                 } else {
499                         if (ipv6_chk_addr(net, saddr, dev,
500                                           flags & RT6_LOOKUP_F_IFACE))
501                                 return sprt;
502                 }
503         }
504
505         if (oif) {
506                 if (local)
507                         return local;
508
509                 if (flags & RT6_LOOKUP_F_IFACE)
510                         return net->ipv6.ip6_null_entry;
511         }
512 out:
513         return rt;
514 }
515
516 #ifdef CONFIG_IPV6_ROUTER_PREF
517 struct __rt6_probe_work {
518         struct work_struct work;
519         struct in6_addr target;
520         struct net_device *dev;
521 };
522
523 static void rt6_probe_deferred(struct work_struct *w)
524 {
525         struct in6_addr mcaddr;
526         struct __rt6_probe_work *work =
527                 container_of(w, struct __rt6_probe_work, work);
528
529         addrconf_addr_solict_mult(&work->target, &mcaddr);
530         ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
531         dev_put(work->dev);
532         kfree(work);
533 }
534
535 static void rt6_probe(struct rt6_info *rt)
536 {
537         struct __rt6_probe_work *work;
538         struct neighbour *neigh;
539         /*
540          * Okay, this does not seem to be appropriate
541          * for now, however, we need to check if it
542          * is really so; aka Router Reachability Probing.
543          *
544          * Router Reachability Probe MUST be rate-limited
545          * to no more than one per minute.
546          */
547         if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
548                 return;
549         rcu_read_lock_bh();
550         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
551         if (neigh) {
552                 if (neigh->nud_state & NUD_VALID)
553                         goto out;
554
555                 work = NULL;
556                 write_lock(&neigh->lock);
557                 if (!(neigh->nud_state & NUD_VALID) &&
558                     time_after(jiffies,
559                                neigh->updated +
560                                rt->rt6i_idev->cnf.rtr_probe_interval)) {
561                         work = kmalloc(sizeof(*work), GFP_ATOMIC);
562                         if (work)
563                                 __neigh_set_probe_once(neigh);
564                 }
565                 write_unlock(&neigh->lock);
566         } else {
567                 work = kmalloc(sizeof(*work), GFP_ATOMIC);
568         }
569
570         if (work) {
571                 INIT_WORK(&work->work, rt6_probe_deferred);
572                 work->target = rt->rt6i_gateway;
573                 dev_hold(rt->dst.dev);
574                 work->dev = rt->dst.dev;
575                 schedule_work(&work->work);
576         }
577
578 out:
579         rcu_read_unlock_bh();
580 }
581 #else
582 static inline void rt6_probe(struct rt6_info *rt)
583 {
584 }
585 #endif
586
587 /*
588  * Default Router Selection (RFC 2461 6.3.6)
589  */
590 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
591 {
592         struct net_device *dev = rt->dst.dev;
593         if (!oif || dev->ifindex == oif)
594                 return 2;
595         if ((dev->flags & IFF_LOOPBACK) &&
596             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
597                 return 1;
598         return 0;
599 }
600
601 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
602 {
603         struct neighbour *neigh;
604         enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
605
606         if (rt->rt6i_flags & RTF_NONEXTHOP ||
607             !(rt->rt6i_flags & RTF_GATEWAY))
608                 return RT6_NUD_SUCCEED;
609
610         rcu_read_lock_bh();
611         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
612         if (neigh) {
613                 read_lock(&neigh->lock);
614                 if (neigh->nud_state & NUD_VALID)
615                         ret = RT6_NUD_SUCCEED;
616 #ifdef CONFIG_IPV6_ROUTER_PREF
617                 else if (!(neigh->nud_state & NUD_FAILED))
618                         ret = RT6_NUD_SUCCEED;
619                 else
620                         ret = RT6_NUD_FAIL_PROBE;
621 #endif
622                 read_unlock(&neigh->lock);
623         } else {
624                 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
625                       RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
626         }
627         rcu_read_unlock_bh();
628
629         return ret;
630 }
631
632 static int rt6_score_route(struct rt6_info *rt, int oif,
633                            int strict)
634 {
635         int m;
636
637         m = rt6_check_dev(rt, oif);
638         if (!m && (strict & RT6_LOOKUP_F_IFACE))
639                 return RT6_NUD_FAIL_HARD;
640 #ifdef CONFIG_IPV6_ROUTER_PREF
641         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
642 #endif
643         if (strict & RT6_LOOKUP_F_REACHABLE) {
644                 int n = rt6_check_neigh(rt);
645                 if (n < 0)
646                         return n;
647         }
648         return m;
649 }
650
651 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
652                                    int *mpri, struct rt6_info *match,
653                                    bool *do_rr)
654 {
655         int m;
656         bool match_do_rr = false;
657         struct inet6_dev *idev = rt->rt6i_idev;
658         struct net_device *dev = rt->dst.dev;
659
660         if (dev && !netif_carrier_ok(dev) &&
661             idev->cnf.ignore_routes_with_linkdown)
662                 goto out;
663
664         if (rt6_check_expired(rt))
665                 goto out;
666
667         m = rt6_score_route(rt, oif, strict);
668         if (m == RT6_NUD_FAIL_DO_RR) {
669                 match_do_rr = true;
670                 m = 0; /* lowest valid score */
671         } else if (m == RT6_NUD_FAIL_HARD) {
672                 goto out;
673         }
674
675         if (strict & RT6_LOOKUP_F_REACHABLE)
676                 rt6_probe(rt);
677
678         /* note that m can be RT6_NUD_FAIL_PROBE at this point */
679         if (m > *mpri) {
680                 *do_rr = match_do_rr;
681                 *mpri = m;
682                 match = rt;
683         }
684 out:
685         return match;
686 }
687
688 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
689                                      struct rt6_info *rr_head,
690                                      u32 metric, int oif, int strict,
691                                      bool *do_rr)
692 {
693         struct rt6_info *rt, *match, *cont;
694         int mpri = -1;
695
696         match = NULL;
697         cont = NULL;
698         for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
699                 if (rt->rt6i_metric != metric) {
700                         cont = rt;
701                         break;
702                 }
703
704                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
705         }
706
707         for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
708                 if (rt->rt6i_metric != metric) {
709                         cont = rt;
710                         break;
711                 }
712
713                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
714         }
715
716         if (match || !cont)
717                 return match;
718
719         for (rt = cont; rt; rt = rt->dst.rt6_next)
720                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
721
722         return match;
723 }
724
725 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
726 {
727         struct rt6_info *match, *rt0;
728         struct net *net;
729         bool do_rr = false;
730
731         rt0 = fn->rr_ptr;
732         if (!rt0)
733                 fn->rr_ptr = rt0 = fn->leaf;
734
735         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
736                              &do_rr);
737
738         if (do_rr) {
739                 struct rt6_info *next = rt0->dst.rt6_next;
740
741                 /* no entries matched; do round-robin */
742                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
743                         next = fn->leaf;
744
745                 if (next != rt0)
746                         fn->rr_ptr = next;
747         }
748
749         net = dev_net(rt0->dst.dev);
750         return match ? match : net->ipv6.ip6_null_entry;
751 }
752
753 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
754 {
755         return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
756 }
757
758 #ifdef CONFIG_IPV6_ROUTE_INFO
759 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
760                   const struct in6_addr *gwaddr)
761 {
762         struct net *net = dev_net(dev);
763         struct route_info *rinfo = (struct route_info *) opt;
764         struct in6_addr prefix_buf, *prefix;
765         unsigned int pref;
766         unsigned long lifetime;
767         struct rt6_info *rt;
768
769         if (len < sizeof(struct route_info)) {
770                 return -EINVAL;
771         }
772
773         /* Sanity check for prefix_len and length */
774         if (rinfo->length > 3) {
775                 return -EINVAL;
776         } else if (rinfo->prefix_len > 128) {
777                 return -EINVAL;
778         } else if (rinfo->prefix_len > 64) {
779                 if (rinfo->length < 2) {
780                         return -EINVAL;
781                 }
782         } else if (rinfo->prefix_len > 0) {
783                 if (rinfo->length < 1) {
784                         return -EINVAL;
785                 }
786         }
787
788         pref = rinfo->route_pref;
789         if (pref == ICMPV6_ROUTER_PREF_INVALID)
790                 return -EINVAL;
791
792         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
793
794         if (rinfo->length == 3)
795                 prefix = (struct in6_addr *)rinfo->prefix;
796         else {
797                 /* this function is safe */
798                 ipv6_addr_prefix(&prefix_buf,
799                                  (struct in6_addr *)rinfo->prefix,
800                                  rinfo->prefix_len);
801                 prefix = &prefix_buf;
802         }
803
804         if (rinfo->prefix_len == 0)
805                 rt = rt6_get_dflt_router(gwaddr, dev);
806         else
807                 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
808                                         gwaddr, dev);
809
810         if (rt && !lifetime) {
811                 ip6_del_rt(rt);
812                 rt = NULL;
813         }
814
815         if (!rt && lifetime)
816                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
817                                         dev, pref);
818         else if (rt)
819                 rt->rt6i_flags = RTF_ROUTEINFO |
820                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
821
822         if (rt) {
823                 if (!addrconf_finite_timeout(lifetime))
824                         rt6_clean_expires(rt);
825                 else
826                         rt6_set_expires(rt, jiffies + HZ * lifetime);
827
828                 ip6_rt_put(rt);
829         }
830         return 0;
831 }
832 #endif
833
834 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
835                                         struct in6_addr *saddr)
836 {
837         struct fib6_node *pn;
838         while (1) {
839                 if (fn->fn_flags & RTN_TL_ROOT)
840                         return NULL;
841                 pn = fn->parent;
842                 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
843                         fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
844                 else
845                         fn = pn;
846                 if (fn->fn_flags & RTN_RTINFO)
847                         return fn;
848         }
849 }
850
851 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
852                                              struct fib6_table *table,
853                                              struct flowi6 *fl6, int flags)
854 {
855         struct fib6_node *fn;
856         struct rt6_info *rt;
857
858         read_lock_bh(&table->tb6_lock);
859         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
860 restart:
861         rt = fn->leaf;
862         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
863         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
864                 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
865         if (rt == net->ipv6.ip6_null_entry) {
866                 fn = fib6_backtrack(fn, &fl6->saddr);
867                 if (fn)
868                         goto restart;
869         }
870         dst_use(&rt->dst, jiffies);
871         read_unlock_bh(&table->tb6_lock);
872
873         trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
874
875         return rt;
876
877 }
878
879 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
880                                     int flags)
881 {
882         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
883 }
884 EXPORT_SYMBOL_GPL(ip6_route_lookup);
885
886 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
887                             const struct in6_addr *saddr, int oif, int strict)
888 {
889         struct flowi6 fl6 = {
890                 .flowi6_oif = oif,
891                 .daddr = *daddr,
892         };
893         struct dst_entry *dst;
894         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
895
896         if (saddr) {
897                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
898                 flags |= RT6_LOOKUP_F_HAS_SADDR;
899         }
900
901         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
902         if (dst->error == 0)
903                 return (struct rt6_info *) dst;
904
905         dst_release(dst);
906
907         return NULL;
908 }
909 EXPORT_SYMBOL(rt6_lookup);
910
911 /* ip6_ins_rt is called with FREE table->tb6_lock.
912    It takes new route entry, the addition fails by any reason the
913    route is freed. In any case, if caller does not hold it, it may
914    be destroyed.
915  */
916
917 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
918                         struct mx6_config *mxc)
919 {
920         int err;
921         struct fib6_table *table;
922
923         table = rt->rt6i_table;
924         write_lock_bh(&table->tb6_lock);
925         err = fib6_add(&table->tb6_root, rt, info, mxc);
926         write_unlock_bh(&table->tb6_lock);
927
928         return err;
929 }
930
931 int ip6_ins_rt(struct rt6_info *rt)
932 {
933         struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
934         struct mx6_config mxc = { .mx = NULL, };
935
936         return __ip6_ins_rt(rt, &info, &mxc);
937 }
938
939 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
940                                            const struct in6_addr *daddr,
941                                            const struct in6_addr *saddr)
942 {
943         struct rt6_info *rt;
944
945         /*
946          *      Clone the route.
947          */
948
949         if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
950                 ort = (struct rt6_info *)ort->dst.from;
951
952         rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
953
954         if (!rt)
955                 return NULL;
956
957         ip6_rt_copy_init(rt, ort);
958         rt->rt6i_flags |= RTF_CACHE;
959         rt->rt6i_metric = 0;
960         rt->dst.flags |= DST_HOST;
961         rt->rt6i_dst.addr = *daddr;
962         rt->rt6i_dst.plen = 128;
963
964         if (!rt6_is_gw_or_nonexthop(ort)) {
965                 if (ort->rt6i_dst.plen != 128 &&
966                     ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
967                         rt->rt6i_flags |= RTF_ANYCAST;
968 #ifdef CONFIG_IPV6_SUBTREES
969                 if (rt->rt6i_src.plen && saddr) {
970                         rt->rt6i_src.addr = *saddr;
971                         rt->rt6i_src.plen = 128;
972                 }
973 #endif
974         }
975
976         return rt;
977 }
978
979 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
980 {
981         struct rt6_info *pcpu_rt;
982
983         pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
984                                   rt->dst.dev, rt->dst.flags);
985
986         if (!pcpu_rt)
987                 return NULL;
988         ip6_rt_copy_init(pcpu_rt, rt);
989         pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
990         pcpu_rt->rt6i_flags |= RTF_PCPU;
991         return pcpu_rt;
992 }
993
994 /* It should be called with read_lock_bh(&tb6_lock) acquired */
995 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
996 {
997         struct rt6_info *pcpu_rt, **p;
998
999         p = this_cpu_ptr(rt->rt6i_pcpu);
1000         pcpu_rt = *p;
1001
1002         if (pcpu_rt) {
1003                 dst_hold(&pcpu_rt->dst);
1004                 rt6_dst_from_metrics_check(pcpu_rt);
1005         }
1006         return pcpu_rt;
1007 }
1008
1009 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1010 {
1011         struct fib6_table *table = rt->rt6i_table;
1012         struct rt6_info *pcpu_rt, *prev, **p;
1013
1014         pcpu_rt = ip6_rt_pcpu_alloc(rt);
1015         if (!pcpu_rt) {
1016                 struct net *net = dev_net(rt->dst.dev);
1017
1018                 dst_hold(&net->ipv6.ip6_null_entry->dst);
1019                 return net->ipv6.ip6_null_entry;
1020         }
1021
1022         read_lock_bh(&table->tb6_lock);
1023         if (rt->rt6i_pcpu) {
1024                 p = this_cpu_ptr(rt->rt6i_pcpu);
1025                 prev = cmpxchg(p, NULL, pcpu_rt);
1026                 if (prev) {
1027                         /* If someone did it before us, return prev instead */
1028                         dst_destroy(&pcpu_rt->dst);
1029                         pcpu_rt = prev;
1030                 }
1031         } else {
1032                 /* rt has been removed from the fib6 tree
1033                  * before we have a chance to acquire the read_lock.
1034                  * In this case, don't brother to create a pcpu rt
1035                  * since rt is going away anyway.  The next
1036                  * dst_check() will trigger a re-lookup.
1037                  */
1038                 dst_destroy(&pcpu_rt->dst);
1039                 pcpu_rt = rt;
1040         }
1041         dst_hold(&pcpu_rt->dst);
1042         rt6_dst_from_metrics_check(pcpu_rt);
1043         read_unlock_bh(&table->tb6_lock);
1044         return pcpu_rt;
1045 }
1046
1047 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1048                                int oif, struct flowi6 *fl6, int flags)
1049 {
1050         struct fib6_node *fn, *saved_fn;
1051         struct rt6_info *rt;
1052         int strict = 0;
1053
1054         strict |= flags & RT6_LOOKUP_F_IFACE;
1055         if (net->ipv6.devconf_all->forwarding == 0)
1056                 strict |= RT6_LOOKUP_F_REACHABLE;
1057
1058         read_lock_bh(&table->tb6_lock);
1059
1060         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1061         saved_fn = fn;
1062
1063         if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1064                 oif = 0;
1065
1066 redo_rt6_select:
1067         rt = rt6_select(fn, oif, strict);
1068         if (rt->rt6i_nsiblings)
1069                 rt = rt6_multipath_select(rt, fl6, oif, strict);
1070         if (rt == net->ipv6.ip6_null_entry) {
1071                 fn = fib6_backtrack(fn, &fl6->saddr);
1072                 if (fn)
1073                         goto redo_rt6_select;
1074                 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1075                         /* also consider unreachable route */
1076                         strict &= ~RT6_LOOKUP_F_REACHABLE;
1077                         fn = saved_fn;
1078                         goto redo_rt6_select;
1079                 }
1080         }
1081
1082
1083         if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1084                 dst_use(&rt->dst, jiffies);
1085                 read_unlock_bh(&table->tb6_lock);
1086
1087                 rt6_dst_from_metrics_check(rt);
1088
1089                 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1090                 return rt;
1091         } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1092                             !(rt->rt6i_flags & RTF_GATEWAY))) {
1093                 /* Create a RTF_CACHE clone which will not be
1094                  * owned by the fib6 tree.  It is for the special case where
1095                  * the daddr in the skb during the neighbor look-up is different
1096                  * from the fl6->daddr used to look-up route here.
1097                  */
1098
1099                 struct rt6_info *uncached_rt;
1100
1101                 dst_use(&rt->dst, jiffies);
1102                 read_unlock_bh(&table->tb6_lock);
1103
1104                 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1105                 dst_release(&rt->dst);
1106
1107                 if (uncached_rt)
1108                         rt6_uncached_list_add(uncached_rt);
1109                 else
1110                         uncached_rt = net->ipv6.ip6_null_entry;
1111
1112                 dst_hold(&uncached_rt->dst);
1113
1114                 trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
1115                 return uncached_rt;
1116
1117         } else {
1118                 /* Get a percpu copy */
1119
1120                 struct rt6_info *pcpu_rt;
1121
1122                 rt->dst.lastuse = jiffies;
1123                 rt->dst.__use++;
1124                 pcpu_rt = rt6_get_pcpu_route(rt);
1125
1126                 if (pcpu_rt) {
1127                         read_unlock_bh(&table->tb6_lock);
1128                 } else {
1129                         /* We have to do the read_unlock first
1130                          * because rt6_make_pcpu_route() may trigger
1131                          * ip6_dst_gc() which will take the write_lock.
1132                          */
1133                         dst_hold(&rt->dst);
1134                         read_unlock_bh(&table->tb6_lock);
1135                         pcpu_rt = rt6_make_pcpu_route(rt);
1136                         dst_release(&rt->dst);
1137                 }
1138
1139                 trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
1140                 return pcpu_rt;
1141
1142         }
1143 }
1144 EXPORT_SYMBOL_GPL(ip6_pol_route);
1145
1146 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1147                                             struct flowi6 *fl6, int flags)
1148 {
1149         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1150 }
1151
1152 struct dst_entry *ip6_route_input_lookup(struct net *net,
1153                                          struct net_device *dev,
1154                                          struct flowi6 *fl6, int flags)
1155 {
1156         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1157                 flags |= RT6_LOOKUP_F_IFACE;
1158
1159         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1160 }
1161 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1162
1163 void ip6_route_input(struct sk_buff *skb)
1164 {
1165         const struct ipv6hdr *iph = ipv6_hdr(skb);
1166         struct net *net = dev_net(skb->dev);
1167         int flags = RT6_LOOKUP_F_HAS_SADDR;
1168         struct ip_tunnel_info *tun_info;
1169         struct flowi6 fl6 = {
1170                 .flowi6_iif = skb->dev->ifindex,
1171                 .daddr = iph->daddr,
1172                 .saddr = iph->saddr,
1173                 .flowlabel = ip6_flowinfo(iph),
1174                 .flowi6_mark = skb->mark,
1175                 .flowi6_proto = iph->nexthdr,
1176         };
1177
1178         tun_info = skb_tunnel_info(skb);
1179         if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1180                 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1181         skb_dst_drop(skb);
1182         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1183 }
1184
1185 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1186                                              struct flowi6 *fl6, int flags)
1187 {
1188         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1189 }
1190
1191 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1192                                          struct flowi6 *fl6, int flags)
1193 {
1194         bool any_src;
1195
1196         if (rt6_need_strict(&fl6->daddr)) {
1197                 struct dst_entry *dst;
1198
1199                 dst = l3mdev_link_scope_lookup(net, fl6);
1200                 if (dst)
1201                         return dst;
1202         }
1203
1204         fl6->flowi6_iif = LOOPBACK_IFINDEX;
1205
1206         any_src = ipv6_addr_any(&fl6->saddr);
1207         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1208             (fl6->flowi6_oif && any_src))
1209                 flags |= RT6_LOOKUP_F_IFACE;
1210
1211         if (!any_src)
1212                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1213         else if (sk)
1214                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1215
1216         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1217 }
1218 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1219
1220 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1221 {
1222         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1223         struct dst_entry *new = NULL;
1224
1225         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1226         if (rt) {
1227                 rt6_info_init(rt);
1228
1229                 new = &rt->dst;
1230                 new->__use = 1;
1231                 new->input = dst_discard;
1232                 new->output = dst_discard_out;
1233
1234                 dst_copy_metrics(new, &ort->dst);
1235                 rt->rt6i_idev = ort->rt6i_idev;
1236                 if (rt->rt6i_idev)
1237                         in6_dev_hold(rt->rt6i_idev);
1238
1239                 rt->rt6i_gateway = ort->rt6i_gateway;
1240                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1241                 rt->rt6i_metric = 0;
1242
1243                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1244 #ifdef CONFIG_IPV6_SUBTREES
1245                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1246 #endif
1247
1248                 dst_free(new);
1249         }
1250
1251         dst_release(dst_orig);
1252         return new ? new : ERR_PTR(-ENOMEM);
1253 }
1254
1255 /*
1256  *      Destination cache support functions
1257  */
1258
1259 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1260 {
1261         if (rt->dst.from &&
1262             dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1263                 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1264 }
1265
1266 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1267 {
1268         if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1269                 return NULL;
1270
1271         if (rt6_check_expired(rt))
1272                 return NULL;
1273
1274         return &rt->dst;
1275 }
1276
1277 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1278 {
1279         if (!__rt6_check_expired(rt) &&
1280             rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1281             rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1282                 return &rt->dst;
1283         else
1284                 return NULL;
1285 }
1286
1287 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1288 {
1289         struct rt6_info *rt;
1290
1291         rt = (struct rt6_info *) dst;
1292
1293         /* All IPV6 dsts are created with ->obsolete set to the value
1294          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1295          * into this function always.
1296          */
1297
1298         rt6_dst_from_metrics_check(rt);
1299
1300         if (rt->rt6i_flags & RTF_PCPU ||
1301             (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1302                 return rt6_dst_from_check(rt, cookie);
1303         else
1304                 return rt6_check(rt, cookie);
1305 }
1306
1307 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1308 {
1309         struct rt6_info *rt = (struct rt6_info *) dst;
1310
1311         if (rt) {
1312                 if (rt->rt6i_flags & RTF_CACHE) {
1313                         if (rt6_check_expired(rt)) {
1314                                 ip6_del_rt(rt);
1315                                 dst = NULL;
1316                         }
1317                 } else {
1318                         dst_release(dst);
1319                         dst = NULL;
1320                 }
1321         }
1322         return dst;
1323 }
1324
1325 static void ip6_link_failure(struct sk_buff *skb)
1326 {
1327         struct rt6_info *rt;
1328
1329         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1330
1331         rt = (struct rt6_info *) skb_dst(skb);
1332         if (rt) {
1333                 if (rt->rt6i_flags & RTF_CACHE) {
1334                         dst_hold(&rt->dst);
1335                         ip6_del_rt(rt);
1336                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1337                         rt->rt6i_node->fn_sernum = -1;
1338                 }
1339         }
1340 }
1341
1342 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1343 {
1344         struct net *net = dev_net(rt->dst.dev);
1345
1346         rt->rt6i_flags |= RTF_MODIFIED;
1347         rt->rt6i_pmtu = mtu;
1348         rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1349 }
1350
1351 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1352 {
1353         return !(rt->rt6i_flags & RTF_CACHE) &&
1354                 (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1355 }
1356
1357 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1358                                  const struct ipv6hdr *iph, u32 mtu)
1359 {
1360         struct rt6_info *rt6 = (struct rt6_info *)dst;
1361
1362         if (rt6->rt6i_flags & RTF_LOCAL)
1363                 return;
1364
1365         dst_confirm(dst);
1366         mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1367         if (mtu >= dst_mtu(dst))
1368                 return;
1369
1370         if (!rt6_cache_allowed_for_pmtu(rt6)) {
1371                 rt6_do_update_pmtu(rt6, mtu);
1372         } else {
1373                 const struct in6_addr *daddr, *saddr;
1374                 struct rt6_info *nrt6;
1375
1376                 if (iph) {
1377                         daddr = &iph->daddr;
1378                         saddr = &iph->saddr;
1379                 } else if (sk) {
1380                         daddr = &sk->sk_v6_daddr;
1381                         saddr = &inet6_sk(sk)->saddr;
1382                 } else {
1383                         return;
1384                 }
1385                 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1386                 if (nrt6) {
1387                         rt6_do_update_pmtu(nrt6, mtu);
1388
1389                         /* ip6_ins_rt(nrt6) will bump the
1390                          * rt6->rt6i_node->fn_sernum
1391                          * which will fail the next rt6_check() and
1392                          * invalidate the sk->sk_dst_cache.
1393                          */
1394                         ip6_ins_rt(nrt6);
1395                 }
1396         }
1397 }
1398
1399 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1400                                struct sk_buff *skb, u32 mtu)
1401 {
1402         __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1403 }
1404
1405 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1406                      int oif, u32 mark)
1407 {
1408         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1409         struct dst_entry *dst;
1410         struct flowi6 fl6;
1411
1412         memset(&fl6, 0, sizeof(fl6));
1413         fl6.flowi6_oif = oif;
1414         fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1415         fl6.daddr = iph->daddr;
1416         fl6.saddr = iph->saddr;
1417         fl6.flowlabel = ip6_flowinfo(iph);
1418
1419         dst = ip6_route_output(net, NULL, &fl6);
1420         if (!dst->error)
1421                 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1422         dst_release(dst);
1423 }
1424 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1425
1426 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1427 {
1428         struct dst_entry *dst;
1429
1430         ip6_update_pmtu(skb, sock_net(sk), mtu,
1431                         sk->sk_bound_dev_if, sk->sk_mark);
1432
1433         dst = __sk_dst_get(sk);
1434         if (!dst || !dst->obsolete ||
1435             dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1436                 return;
1437
1438         bh_lock_sock(sk);
1439         if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1440                 ip6_datagram_dst_update(sk, false);
1441         bh_unlock_sock(sk);
1442 }
1443 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1444
1445 /* Handle redirects */
1446 struct ip6rd_flowi {
1447         struct flowi6 fl6;
1448         struct in6_addr gateway;
1449 };
1450
1451 static struct rt6_info *__ip6_route_redirect(struct net *net,
1452                                              struct fib6_table *table,
1453                                              struct flowi6 *fl6,
1454                                              int flags)
1455 {
1456         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1457         struct rt6_info *rt;
1458         struct fib6_node *fn;
1459
1460         /* Get the "current" route for this destination and
1461          * check if the redirect has come from approriate router.
1462          *
1463          * RFC 4861 specifies that redirects should only be
1464          * accepted if they come from the nexthop to the target.
1465          * Due to the way the routes are chosen, this notion
1466          * is a bit fuzzy and one might need to check all possible
1467          * routes.
1468          */
1469
1470         read_lock_bh(&table->tb6_lock);
1471         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1472 restart:
1473         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1474                 if (rt6_check_expired(rt))
1475                         continue;
1476                 if (rt->dst.error)
1477                         break;
1478                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1479                         continue;
1480                 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1481                         continue;
1482                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1483                         continue;
1484                 break;
1485         }
1486
1487         if (!rt)
1488                 rt = net->ipv6.ip6_null_entry;
1489         else if (rt->dst.error) {
1490                 rt = net->ipv6.ip6_null_entry;
1491                 goto out;
1492         }
1493
1494         if (rt == net->ipv6.ip6_null_entry) {
1495                 fn = fib6_backtrack(fn, &fl6->saddr);
1496                 if (fn)
1497                         goto restart;
1498         }
1499
1500 out:
1501         dst_hold(&rt->dst);
1502
1503         read_unlock_bh(&table->tb6_lock);
1504
1505         trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1506         return rt;
1507 };
1508
1509 static struct dst_entry *ip6_route_redirect(struct net *net,
1510                                         const struct flowi6 *fl6,
1511                                         const struct in6_addr *gateway)
1512 {
1513         int flags = RT6_LOOKUP_F_HAS_SADDR;
1514         struct ip6rd_flowi rdfl;
1515
1516         rdfl.fl6 = *fl6;
1517         rdfl.gateway = *gateway;
1518
1519         return fib6_rule_lookup(net, &rdfl.fl6,
1520                                 flags, __ip6_route_redirect);
1521 }
1522
1523 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1524 {
1525         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1526         struct dst_entry *dst;
1527         struct flowi6 fl6;
1528
1529         memset(&fl6, 0, sizeof(fl6));
1530         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1531         fl6.flowi6_oif = oif;
1532         fl6.flowi6_mark = mark;
1533         fl6.daddr = iph->daddr;
1534         fl6.saddr = iph->saddr;
1535         fl6.flowlabel = ip6_flowinfo(iph);
1536
1537         dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1538         rt6_do_redirect(dst, NULL, skb);
1539         dst_release(dst);
1540 }
1541 EXPORT_SYMBOL_GPL(ip6_redirect);
1542
1543 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1544                             u32 mark)
1545 {
1546         const struct ipv6hdr *iph = ipv6_hdr(skb);
1547         const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1548         struct dst_entry *dst;
1549         struct flowi6 fl6;
1550
1551         memset(&fl6, 0, sizeof(fl6));
1552         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1553         fl6.flowi6_oif = oif;
1554         fl6.flowi6_mark = mark;
1555         fl6.daddr = msg->dest;
1556         fl6.saddr = iph->daddr;
1557
1558         dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1559         rt6_do_redirect(dst, NULL, skb);
1560         dst_release(dst);
1561 }
1562
1563 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1564 {
1565         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1566 }
1567 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1568
1569 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1570 {
1571         struct net_device *dev = dst->dev;
1572         unsigned int mtu = dst_mtu(dst);
1573         struct net *net = dev_net(dev);
1574
1575         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1576
1577         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1578                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1579
1580         /*
1581          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1582          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1583          * IPV6_MAXPLEN is also valid and means: "any MSS,
1584          * rely only on pmtu discovery"
1585          */
1586         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1587                 mtu = IPV6_MAXPLEN;
1588         return mtu;
1589 }
1590
1591 static unsigned int ip6_mtu(const struct dst_entry *dst)
1592 {
1593         const struct rt6_info *rt = (const struct rt6_info *)dst;
1594         unsigned int mtu = rt->rt6i_pmtu;
1595         struct inet6_dev *idev;
1596
1597         if (mtu)
1598                 goto out;
1599
1600         mtu = dst_metric_raw(dst, RTAX_MTU);
1601         if (mtu)
1602                 goto out;
1603
1604         mtu = IPV6_MIN_MTU;
1605
1606         rcu_read_lock();
1607         idev = __in6_dev_get(dst->dev);
1608         if (idev)
1609                 mtu = idev->cnf.mtu6;
1610         rcu_read_unlock();
1611
1612 out:
1613         mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1614
1615         return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
1616 }
1617
1618 static struct dst_entry *icmp6_dst_gc_list;
1619 static DEFINE_SPINLOCK(icmp6_dst_lock);
1620
1621 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1622                                   struct flowi6 *fl6)
1623 {
1624         struct dst_entry *dst;
1625         struct rt6_info *rt;
1626         struct inet6_dev *idev = in6_dev_get(dev);
1627         struct net *net = dev_net(dev);
1628
1629         if (unlikely(!idev))
1630                 return ERR_PTR(-ENODEV);
1631
1632         rt = ip6_dst_alloc(net, dev, 0);
1633         if (unlikely(!rt)) {
1634                 in6_dev_put(idev);
1635                 dst = ERR_PTR(-ENOMEM);
1636                 goto out;
1637         }
1638
1639         rt->dst.flags |= DST_HOST;
1640         rt->dst.output  = ip6_output;
1641         atomic_set(&rt->dst.__refcnt, 1);
1642         rt->rt6i_gateway  = fl6->daddr;
1643         rt->rt6i_dst.addr = fl6->daddr;
1644         rt->rt6i_dst.plen = 128;
1645         rt->rt6i_idev     = idev;
1646         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1647
1648         spin_lock_bh(&icmp6_dst_lock);
1649         rt->dst.next = icmp6_dst_gc_list;
1650         icmp6_dst_gc_list = &rt->dst;
1651         spin_unlock_bh(&icmp6_dst_lock);
1652
1653         fib6_force_start_gc(net);
1654
1655         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1656
1657 out:
1658         return dst;
1659 }
1660
1661 int icmp6_dst_gc(void)
1662 {
1663         struct dst_entry *dst, **pprev;
1664         int more = 0;
1665
1666         spin_lock_bh(&icmp6_dst_lock);
1667         pprev = &icmp6_dst_gc_list;
1668
1669         while ((dst = *pprev) != NULL) {
1670                 if (!atomic_read(&dst->__refcnt)) {
1671                         *pprev = dst->next;
1672                         dst_free(dst);
1673                 } else {
1674                         pprev = &dst->next;
1675                         ++more;
1676                 }
1677         }
1678
1679         spin_unlock_bh(&icmp6_dst_lock);
1680
1681         return more;
1682 }
1683
1684 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1685                             void *arg)
1686 {
1687         struct dst_entry *dst, **pprev;
1688
1689         spin_lock_bh(&icmp6_dst_lock);
1690         pprev = &icmp6_dst_gc_list;
1691         while ((dst = *pprev) != NULL) {
1692                 struct rt6_info *rt = (struct rt6_info *) dst;
1693                 if (func(rt, arg)) {
1694                         *pprev = dst->next;
1695                         dst_free(dst);
1696                 } else {
1697                         pprev = &dst->next;
1698                 }
1699         }
1700         spin_unlock_bh(&icmp6_dst_lock);
1701 }
1702
1703 static int ip6_dst_gc(struct dst_ops *ops)
1704 {
1705         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1706         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1707         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1708         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1709         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1710         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1711         int entries;
1712
1713         entries = dst_entries_get_fast(ops);
1714         if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1715             entries <= rt_max_size)
1716                 goto out;
1717
1718         net->ipv6.ip6_rt_gc_expire++;
1719         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1720         entries = dst_entries_get_slow(ops);
1721         if (entries < ops->gc_thresh)
1722                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1723 out:
1724         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1725         return entries > rt_max_size;
1726 }
1727
1728 static int ip6_convert_metrics(struct mx6_config *mxc,
1729                                const struct fib6_config *cfg)
1730 {
1731         bool ecn_ca = false;
1732         struct nlattr *nla;
1733         int remaining;
1734         u32 *mp;
1735
1736         if (!cfg->fc_mx)
1737                 return 0;
1738
1739         mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1740         if (unlikely(!mp))
1741                 return -ENOMEM;
1742
1743         nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1744                 int type = nla_type(nla);
1745                 u32 val;
1746
1747                 if (!type)
1748                         continue;
1749                 if (unlikely(type > RTAX_MAX))
1750                         goto err;
1751
1752                 if (type == RTAX_CC_ALGO) {
1753                         char tmp[TCP_CA_NAME_MAX];
1754
1755                         nla_strlcpy(tmp, nla, sizeof(tmp));
1756                         val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1757                         if (val == TCP_CA_UNSPEC)
1758                                 goto err;
1759                 } else {
1760                         val = nla_get_u32(nla);
1761                 }
1762                 if (type == RTAX_HOPLIMIT && val > 255)
1763                         val = 255;
1764                 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1765                         goto err;
1766
1767                 mp[type - 1] = val;
1768                 __set_bit(type - 1, mxc->mx_valid);
1769         }
1770
1771         if (ecn_ca) {
1772                 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1773                 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1774         }
1775
1776         mxc->mx = mp;
1777         return 0;
1778  err:
1779         kfree(mp);
1780         return -EINVAL;
1781 }
1782
1783 static struct rt6_info *ip6_nh_lookup_table(struct net *net,
1784                                             struct fib6_config *cfg,
1785                                             const struct in6_addr *gw_addr)
1786 {
1787         struct flowi6 fl6 = {
1788                 .flowi6_oif = cfg->fc_ifindex,
1789                 .daddr = *gw_addr,
1790                 .saddr = cfg->fc_prefsrc,
1791         };
1792         struct fib6_table *table;
1793         struct rt6_info *rt;
1794         int flags = RT6_LOOKUP_F_IFACE;
1795
1796         table = fib6_get_table(net, cfg->fc_table);
1797         if (!table)
1798                 return NULL;
1799
1800         if (!ipv6_addr_any(&cfg->fc_prefsrc))
1801                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1802
1803         rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
1804
1805         /* if table lookup failed, fall back to full lookup */
1806         if (rt == net->ipv6.ip6_null_entry) {
1807                 ip6_rt_put(rt);
1808                 rt = NULL;
1809         }
1810
1811         return rt;
1812 }
1813
1814 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1815 {
1816         struct net *net = cfg->fc_nlinfo.nl_net;
1817         struct rt6_info *rt = NULL;
1818         struct net_device *dev = NULL;
1819         struct inet6_dev *idev = NULL;
1820         struct fib6_table *table;
1821         int addr_type;
1822         int err = -EINVAL;
1823
1824         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1825                 goto out;
1826 #ifndef CONFIG_IPV6_SUBTREES
1827         if (cfg->fc_src_len)
1828                 goto out;
1829 #endif
1830         if (cfg->fc_ifindex) {
1831                 err = -ENODEV;
1832                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1833                 if (!dev)
1834                         goto out;
1835                 idev = in6_dev_get(dev);
1836                 if (!idev)
1837                         goto out;
1838         }
1839
1840         if (cfg->fc_metric == 0)
1841                 cfg->fc_metric = IP6_RT_PRIO_USER;
1842
1843         err = -ENOBUFS;
1844         if (cfg->fc_nlinfo.nlh &&
1845             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1846                 table = fib6_get_table(net, cfg->fc_table);
1847                 if (!table) {
1848                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1849                         table = fib6_new_table(net, cfg->fc_table);
1850                 }
1851         } else {
1852                 table = fib6_new_table(net, cfg->fc_table);
1853         }
1854
1855         if (!table)
1856                 goto out;
1857
1858         rt = ip6_dst_alloc(net, NULL,
1859                            (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1860
1861         if (!rt) {
1862                 err = -ENOMEM;
1863                 goto out;
1864         }
1865
1866         if (cfg->fc_flags & RTF_EXPIRES)
1867                 rt6_set_expires(rt, jiffies +
1868                                 clock_t_to_jiffies(cfg->fc_expires));
1869         else
1870                 rt6_clean_expires(rt);
1871
1872         if (cfg->fc_protocol == RTPROT_UNSPEC)
1873                 cfg->fc_protocol = RTPROT_BOOT;
1874         rt->rt6i_protocol = cfg->fc_protocol;
1875
1876         addr_type = ipv6_addr_type(&cfg->fc_dst);
1877
1878         if (addr_type & IPV6_ADDR_MULTICAST)
1879                 rt->dst.input = ip6_mc_input;
1880         else if (cfg->fc_flags & RTF_LOCAL)
1881                 rt->dst.input = ip6_input;
1882         else
1883                 rt->dst.input = ip6_forward;
1884
1885         rt->dst.output = ip6_output;
1886
1887         if (cfg->fc_encap) {
1888                 struct lwtunnel_state *lwtstate;
1889
1890                 err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1891                                            cfg->fc_encap, AF_INET6, cfg,
1892                                            &lwtstate);
1893                 if (err)
1894                         goto out;
1895                 rt->dst.lwtstate = lwtstate_get(lwtstate);
1896                 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1897                         rt->dst.lwtstate->orig_output = rt->dst.output;
1898                         rt->dst.output = lwtunnel_output;
1899                 }
1900                 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1901                         rt->dst.lwtstate->orig_input = rt->dst.input;
1902                         rt->dst.input = lwtunnel_input;
1903                 }
1904         }
1905
1906         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1907         rt->rt6i_dst.plen = cfg->fc_dst_len;
1908         if (rt->rt6i_dst.plen == 128)
1909                 rt->dst.flags |= DST_HOST;
1910
1911 #ifdef CONFIG_IPV6_SUBTREES
1912         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1913         rt->rt6i_src.plen = cfg->fc_src_len;
1914 #endif
1915
1916         rt->rt6i_metric = cfg->fc_metric;
1917
1918         /* We cannot add true routes via loopback here,
1919            they would result in kernel looping; promote them to reject routes
1920          */
1921         if ((cfg->fc_flags & RTF_REJECT) ||
1922             (dev && (dev->flags & IFF_LOOPBACK) &&
1923              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1924              !(cfg->fc_flags & RTF_LOCAL))) {
1925                 /* hold loopback dev/idev if we haven't done so. */
1926                 if (dev != net->loopback_dev) {
1927                         if (dev) {
1928                                 dev_put(dev);
1929                                 in6_dev_put(idev);
1930                         }
1931                         dev = net->loopback_dev;
1932                         dev_hold(dev);
1933                         idev = in6_dev_get(dev);
1934                         if (!idev) {
1935                                 err = -ENODEV;
1936                                 goto out;
1937                         }
1938                 }
1939                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1940                 switch (cfg->fc_type) {
1941                 case RTN_BLACKHOLE:
1942                         rt->dst.error = -EINVAL;
1943                         rt->dst.output = dst_discard_out;
1944                         rt->dst.input = dst_discard;
1945                         break;
1946                 case RTN_PROHIBIT:
1947                         rt->dst.error = -EACCES;
1948                         rt->dst.output = ip6_pkt_prohibit_out;
1949                         rt->dst.input = ip6_pkt_prohibit;
1950                         break;
1951                 case RTN_THROW:
1952                 case RTN_UNREACHABLE:
1953                 default:
1954                         rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1955                                         : (cfg->fc_type == RTN_UNREACHABLE)
1956                                         ? -EHOSTUNREACH : -ENETUNREACH;
1957                         rt->dst.output = ip6_pkt_discard_out;
1958                         rt->dst.input = ip6_pkt_discard;
1959                         break;
1960                 }
1961                 goto install_route;
1962         }
1963
1964         if (cfg->fc_flags & RTF_GATEWAY) {
1965                 const struct in6_addr *gw_addr;
1966                 int gwa_type;
1967
1968                 gw_addr = &cfg->fc_gateway;
1969                 gwa_type = ipv6_addr_type(gw_addr);
1970
1971                 /* if gw_addr is local we will fail to detect this in case
1972                  * address is still TENTATIVE (DAD in progress). rt6_lookup()
1973                  * will return already-added prefix route via interface that
1974                  * prefix route was assigned to, which might be non-loopback.
1975                  */
1976                 err = -EINVAL;
1977                 if (ipv6_chk_addr_and_flags(net, gw_addr,
1978                                             gwa_type & IPV6_ADDR_LINKLOCAL ?
1979                                             dev : NULL, 0, 0))
1980                         goto out;
1981
1982                 rt->rt6i_gateway = *gw_addr;
1983
1984                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1985                         struct rt6_info *grt = NULL;
1986
1987                         /* IPv6 strictly inhibits using not link-local
1988                            addresses as nexthop address.
1989                            Otherwise, router will not able to send redirects.
1990                            It is very good, but in some (rare!) circumstances
1991                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1992                            some exceptions. --ANK
1993                          */
1994                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1995                                 goto out;
1996
1997                         if (cfg->fc_table) {
1998                                 grt = ip6_nh_lookup_table(net, cfg, gw_addr);
1999
2000                                 if (grt) {
2001                                         if (grt->rt6i_flags & RTF_GATEWAY ||
2002                                             (dev && dev != grt->dst.dev)) {
2003                                                 ip6_rt_put(grt);
2004                                                 grt = NULL;
2005                                         }
2006                                 }
2007                         }
2008
2009                         if (!grt)
2010                                 grt = rt6_lookup(net, gw_addr, NULL,
2011                                                  cfg->fc_ifindex, 1);
2012
2013                         err = -EHOSTUNREACH;
2014                         if (!grt)
2015                                 goto out;
2016                         if (dev) {
2017                                 if (dev != grt->dst.dev) {
2018                                         ip6_rt_put(grt);
2019                                         goto out;
2020                                 }
2021                         } else {
2022                                 dev = grt->dst.dev;
2023                                 idev = grt->rt6i_idev;
2024                                 dev_hold(dev);
2025                                 in6_dev_hold(grt->rt6i_idev);
2026                         }
2027                         if (!(grt->rt6i_flags & RTF_GATEWAY))
2028                                 err = 0;
2029                         ip6_rt_put(grt);
2030
2031                         if (err)
2032                                 goto out;
2033                 }
2034                 err = -EINVAL;
2035                 if (!dev || (dev->flags & IFF_LOOPBACK))
2036                         goto out;
2037         }
2038
2039         err = -ENODEV;
2040         if (!dev)
2041                 goto out;
2042
2043         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2044                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
2045                         err = -EINVAL;
2046                         goto out;
2047                 }
2048                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
2049                 rt->rt6i_prefsrc.plen = 128;
2050         } else
2051                 rt->rt6i_prefsrc.plen = 0;
2052
2053         rt->rt6i_flags = cfg->fc_flags;
2054
2055 install_route:
2056         rt->dst.dev = dev;
2057         rt->rt6i_idev = idev;
2058         rt->rt6i_table = table;
2059
2060         cfg->fc_nlinfo.nl_net = dev_net(dev);
2061
2062         return rt;
2063 out:
2064         if (dev)
2065                 dev_put(dev);
2066         if (idev)
2067                 in6_dev_put(idev);
2068         if (rt)
2069                 dst_free(&rt->dst);
2070
2071         return ERR_PTR(err);
2072 }
2073
2074 int ip6_route_add(struct fib6_config *cfg)
2075 {
2076         struct mx6_config mxc = { .mx = NULL, };
2077         struct rt6_info *rt;
2078         int err;
2079
2080         rt = ip6_route_info_create(cfg);
2081         if (IS_ERR(rt)) {
2082                 err = PTR_ERR(rt);
2083                 rt = NULL;
2084                 goto out;
2085         }
2086
2087         err = ip6_convert_metrics(&mxc, cfg);
2088         if (err)
2089                 goto out;
2090
2091         err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2092
2093         kfree(mxc.mx);
2094
2095         return err;
2096 out:
2097         if (rt)
2098                 dst_free(&rt->dst);
2099
2100         return err;
2101 }
2102
2103 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2104 {
2105         int err;
2106         struct fib6_table *table;
2107         struct net *net = dev_net(rt->dst.dev);
2108
2109         if (rt == net->ipv6.ip6_null_entry ||
2110             rt->dst.flags & DST_NOCACHE) {
2111                 err = -ENOENT;
2112                 goto out;
2113         }
2114
2115         table = rt->rt6i_table;
2116         write_lock_bh(&table->tb6_lock);
2117         err = fib6_del(rt, info);
2118         write_unlock_bh(&table->tb6_lock);
2119
2120 out:
2121         ip6_rt_put(rt);
2122         return err;
2123 }
2124
2125 int ip6_del_rt(struct rt6_info *rt)
2126 {
2127         struct nl_info info = {
2128                 .nl_net = dev_net(rt->dst.dev),
2129         };
2130         return __ip6_del_rt(rt, &info);
2131 }
2132
2133 static int ip6_route_del(struct fib6_config *cfg)
2134 {
2135         struct fib6_table *table;
2136         struct fib6_node *fn;
2137         struct rt6_info *rt;
2138         int err = -ESRCH;
2139
2140         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2141         if (!table)
2142                 return err;
2143
2144         read_lock_bh(&table->tb6_lock);
2145
2146         fn = fib6_locate(&table->tb6_root,
2147                          &cfg->fc_dst, cfg->fc_dst_len,
2148                          &cfg->fc_src, cfg->fc_src_len);
2149
2150         if (fn) {
2151                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2152                         if ((rt->rt6i_flags & RTF_CACHE) &&
2153                             !(cfg->fc_flags & RTF_CACHE))
2154                                 continue;
2155                         if (cfg->fc_ifindex &&
2156                             (!rt->dst.dev ||
2157                              rt->dst.dev->ifindex != cfg->fc_ifindex))
2158                                 continue;
2159                         if (cfg->fc_flags & RTF_GATEWAY &&
2160                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2161                                 continue;
2162                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2163                                 continue;
2164                         dst_hold(&rt->dst);
2165                         read_unlock_bh(&table->tb6_lock);
2166
2167                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2168                 }
2169         }
2170         read_unlock_bh(&table->tb6_lock);
2171
2172         return err;
2173 }
2174
2175 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2176 {
2177         struct netevent_redirect netevent;
2178         struct rt6_info *rt, *nrt = NULL;
2179         struct ndisc_options ndopts;
2180         struct inet6_dev *in6_dev;
2181         struct neighbour *neigh;
2182         struct rd_msg *msg;
2183         int optlen, on_link;
2184         u8 *lladdr;
2185
2186         optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2187         optlen -= sizeof(*msg);
2188
2189         if (optlen < 0) {
2190                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2191                 return;
2192         }
2193
2194         msg = (struct rd_msg *)icmp6_hdr(skb);
2195
2196         if (ipv6_addr_is_multicast(&msg->dest)) {
2197                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2198                 return;
2199         }
2200
2201         on_link = 0;
2202         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2203                 on_link = 1;
2204         } else if (ipv6_addr_type(&msg->target) !=
2205                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2206                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2207                 return;
2208         }
2209
2210         in6_dev = __in6_dev_get(skb->dev);
2211         if (!in6_dev)
2212                 return;
2213         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2214                 return;
2215
2216         /* RFC2461 8.1:
2217          *      The IP source address of the Redirect MUST be the same as the current
2218          *      first-hop router for the specified ICMP Destination Address.
2219          */
2220
2221         if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
2222                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2223                 return;
2224         }
2225
2226         lladdr = NULL;
2227         if (ndopts.nd_opts_tgt_lladdr) {
2228                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2229                                              skb->dev);
2230                 if (!lladdr) {
2231                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2232                         return;
2233                 }
2234         }
2235
2236         rt = (struct rt6_info *) dst;
2237         if (rt->rt6i_flags & RTF_REJECT) {
2238                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2239                 return;
2240         }
2241
2242         /* Redirect received -> path was valid.
2243          * Look, redirects are sent only in response to data packets,
2244          * so that this nexthop apparently is reachable. --ANK
2245          */
2246         dst_confirm(&rt->dst);
2247
2248         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2249         if (!neigh)
2250                 return;
2251
2252         /*
2253          *      We have finally decided to accept it.
2254          */
2255
2256         ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
2257                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
2258                      NEIGH_UPDATE_F_OVERRIDE|
2259                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2260                                      NEIGH_UPDATE_F_ISROUTER)),
2261                      NDISC_REDIRECT, &ndopts);
2262
2263         nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2264         if (!nrt)
2265                 goto out;
2266
2267         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2268         if (on_link)
2269                 nrt->rt6i_flags &= ~RTF_GATEWAY;
2270
2271         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2272
2273         if (ip6_ins_rt(nrt))
2274                 goto out;
2275
2276         netevent.old = &rt->dst;
2277         netevent.new = &nrt->dst;
2278         netevent.daddr = &msg->dest;
2279         netevent.neigh = neigh;
2280         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2281
2282         if (rt->rt6i_flags & RTF_CACHE) {
2283                 rt = (struct rt6_info *) dst_clone(&rt->dst);
2284                 ip6_del_rt(rt);
2285         }
2286
2287 out:
2288         neigh_release(neigh);
2289 }
2290
2291 /*
2292  *      Misc support functions
2293  */
2294
2295 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2296 {
2297         BUG_ON(from->dst.from);
2298
2299         rt->rt6i_flags &= ~RTF_EXPIRES;
2300         dst_hold(&from->dst);
2301         rt->dst.from = &from->dst;
2302         dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2303 }
2304
2305 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2306 {
2307         rt->dst.input = ort->dst.input;
2308         rt->dst.output = ort->dst.output;
2309         rt->rt6i_dst = ort->rt6i_dst;
2310         rt->dst.error = ort->dst.error;
2311         rt->rt6i_idev = ort->rt6i_idev;
2312         if (rt->rt6i_idev)
2313                 in6_dev_hold(rt->rt6i_idev);
2314         rt->dst.lastuse = jiffies;
2315         rt->rt6i_gateway = ort->rt6i_gateway;
2316         rt->rt6i_flags = ort->rt6i_flags;
2317         rt6_set_from(rt, ort);
2318         rt->rt6i_metric = ort->rt6i_metric;
2319 #ifdef CONFIG_IPV6_SUBTREES
2320         rt->rt6i_src = ort->rt6i_src;
2321 #endif
2322         rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2323         rt->rt6i_table = ort->rt6i_table;
2324         rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2325 }
2326
2327 #ifdef CONFIG_IPV6_ROUTE_INFO
2328 static struct rt6_info *rt6_get_route_info(struct net *net,
2329                                            const struct in6_addr *prefix, int prefixlen,
2330                                            const struct in6_addr *gwaddr,
2331                                            struct net_device *dev)
2332 {
2333         u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
2334         int ifindex = dev->ifindex;
2335         struct fib6_node *fn;
2336         struct rt6_info *rt = NULL;
2337         struct fib6_table *table;
2338
2339         table = fib6_get_table(net, tb_id);
2340         if (!table)
2341                 return NULL;
2342
2343         read_lock_bh(&table->tb6_lock);
2344         fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2345         if (!fn)
2346                 goto out;
2347
2348         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2349                 if (rt->dst.dev->ifindex != ifindex)
2350                         continue;
2351                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2352                         continue;
2353                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2354                         continue;
2355                 dst_hold(&rt->dst);
2356                 break;
2357         }
2358 out:
2359         read_unlock_bh(&table->tb6_lock);
2360         return rt;
2361 }
2362
2363 static struct rt6_info *rt6_add_route_info(struct net *net,
2364                                            const struct in6_addr *prefix, int prefixlen,
2365                                            const struct in6_addr *gwaddr,
2366                                            struct net_device *dev,
2367                                            unsigned int pref)
2368 {
2369         struct fib6_config cfg = {
2370                 .fc_metric      = IP6_RT_PRIO_USER,
2371                 .fc_ifindex     = dev->ifindex,
2372                 .fc_dst_len     = prefixlen,
2373                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2374                                   RTF_UP | RTF_PREF(pref),
2375                 .fc_nlinfo.portid = 0,
2376                 .fc_nlinfo.nlh = NULL,
2377                 .fc_nlinfo.nl_net = net,
2378         };
2379
2380         cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
2381         cfg.fc_dst = *prefix;
2382         cfg.fc_gateway = *gwaddr;
2383
2384         /* We should treat it as a default route if prefix length is 0. */
2385         if (!prefixlen)
2386                 cfg.fc_flags |= RTF_DEFAULT;
2387
2388         ip6_route_add(&cfg);
2389
2390         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
2391 }
2392 #endif
2393
2394 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2395 {
2396         u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
2397         struct rt6_info *rt;
2398         struct fib6_table *table;
2399
2400         table = fib6_get_table(dev_net(dev), tb_id);
2401         if (!table)
2402                 return NULL;
2403
2404         read_lock_bh(&table->tb6_lock);
2405         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2406                 if (dev == rt->dst.dev &&
2407                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2408                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
2409                         break;
2410         }
2411         if (rt)
2412                 dst_hold(&rt->dst);
2413         read_unlock_bh(&table->tb6_lock);
2414         return rt;
2415 }
2416
2417 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2418                                      struct net_device *dev,
2419                                      unsigned int pref)
2420 {
2421         struct fib6_config cfg = {
2422                 .fc_table       = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
2423                 .fc_metric      = IP6_RT_PRIO_USER,
2424                 .fc_ifindex     = dev->ifindex,
2425                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2426                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2427                 .fc_nlinfo.portid = 0,
2428                 .fc_nlinfo.nlh = NULL,
2429                 .fc_nlinfo.nl_net = dev_net(dev),
2430         };
2431
2432         cfg.fc_gateway = *gwaddr;
2433
2434         if (!ip6_route_add(&cfg)) {
2435                 struct fib6_table *table;
2436
2437                 table = fib6_get_table(dev_net(dev), cfg.fc_table);
2438                 if (table)
2439                         table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
2440         }
2441
2442         return rt6_get_dflt_router(gwaddr, dev);
2443 }
2444
2445 static void __rt6_purge_dflt_routers(struct fib6_table *table)
2446 {
2447         struct rt6_info *rt;
2448
2449 restart:
2450         read_lock_bh(&table->tb6_lock);
2451         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2452                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2453                     (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2454                         dst_hold(&rt->dst);
2455                         read_unlock_bh(&table->tb6_lock);
2456                         ip6_del_rt(rt);
2457                         goto restart;
2458                 }
2459         }
2460         read_unlock_bh(&table->tb6_lock);
2461
2462         table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
2463 }
2464
2465 void rt6_purge_dflt_routers(struct net *net)
2466 {
2467         struct fib6_table *table;
2468         struct hlist_head *head;
2469         unsigned int h;
2470
2471         rcu_read_lock();
2472
2473         for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
2474                 head = &net->ipv6.fib_table_hash[h];
2475                 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
2476                         if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
2477                                 __rt6_purge_dflt_routers(table);
2478                 }
2479         }
2480
2481         rcu_read_unlock();
2482 }
2483
2484 static void rtmsg_to_fib6_config(struct net *net,
2485                                  struct in6_rtmsg *rtmsg,
2486                                  struct fib6_config *cfg)
2487 {
2488         memset(cfg, 0, sizeof(*cfg));
2489
2490         cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2491                          : RT6_TABLE_MAIN;
2492         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2493         cfg->fc_metric = rtmsg->rtmsg_metric;
2494         cfg->fc_expires = rtmsg->rtmsg_info;
2495         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2496         cfg->fc_src_len = rtmsg->rtmsg_src_len;
2497         cfg->fc_flags = rtmsg->rtmsg_flags;
2498
2499         cfg->fc_nlinfo.nl_net = net;
2500
2501         cfg->fc_dst = rtmsg->rtmsg_dst;
2502         cfg->fc_src = rtmsg->rtmsg_src;
2503         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2504 }
2505
2506 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2507 {
2508         struct fib6_config cfg;
2509         struct in6_rtmsg rtmsg;
2510         int err;
2511
2512         switch (cmd) {
2513         case SIOCADDRT:         /* Add a route */
2514         case SIOCDELRT:         /* Delete a route */
2515                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2516                         return -EPERM;
2517                 err = copy_from_user(&rtmsg, arg,
2518                                      sizeof(struct in6_rtmsg));
2519                 if (err)
2520                         return -EFAULT;
2521
2522                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2523
2524                 rtnl_lock();
2525                 switch (cmd) {
2526                 case SIOCADDRT:
2527                         err = ip6_route_add(&cfg);
2528                         break;
2529                 case SIOCDELRT:
2530                         err = ip6_route_del(&cfg);
2531                         break;
2532                 default:
2533                         err = -EINVAL;
2534                 }
2535                 rtnl_unlock();
2536
2537                 return err;
2538         }
2539
2540         return -EINVAL;
2541 }
2542
2543 /*
2544  *      Drop the packet on the floor
2545  */
2546
2547 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2548 {
2549         int type;
2550         struct dst_entry *dst = skb_dst(skb);
2551         switch (ipstats_mib_noroutes) {
2552         case IPSTATS_MIB_INNOROUTES:
2553                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2554                 if (type == IPV6_ADDR_ANY) {
2555                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2556                                       IPSTATS_MIB_INADDRERRORS);
2557                         break;
2558                 }
2559                 /* FALLTHROUGH */
2560         case IPSTATS_MIB_OUTNOROUTES:
2561                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2562                               ipstats_mib_noroutes);
2563                 break;
2564         }
2565         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2566         kfree_skb(skb);
2567         return 0;
2568 }
2569
2570 static int ip6_pkt_discard(struct sk_buff *skb)
2571 {
2572         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2573 }
2574
2575 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2576 {
2577         skb->dev = skb_dst(skb)->dev;
2578         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2579 }
2580
2581 static int ip6_pkt_prohibit(struct sk_buff *skb)
2582 {
2583         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2584 }
2585
2586 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2587 {
2588         skb->dev = skb_dst(skb)->dev;
2589         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2590 }
2591
2592 /*
2593  *      Allocate a dst for local (unicast / anycast) address.
2594  */
2595
2596 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2597                                     const struct in6_addr *addr,
2598                                     bool anycast)
2599 {
2600         u32 tb_id;
2601         struct net *net = dev_net(idev->dev);
2602         struct net_device *dev = net->loopback_dev;
2603         struct rt6_info *rt;
2604
2605         /* use L3 Master device as loopback for host routes if device
2606          * is enslaved and address is not link local or multicast
2607          */
2608         if (!rt6_need_strict(addr))
2609                 dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
2610
2611         rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
2612         if (!rt)
2613                 return ERR_PTR(-ENOMEM);
2614
2615         in6_dev_hold(idev);
2616
2617         rt->dst.flags |= DST_HOST;
2618         rt->dst.input = ip6_input;
2619         rt->dst.output = ip6_output;
2620         rt->rt6i_idev = idev;
2621
2622         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2623         if (anycast)
2624                 rt->rt6i_flags |= RTF_ANYCAST;
2625         else
2626                 rt->rt6i_flags |= RTF_LOCAL;
2627
2628         rt->rt6i_gateway  = *addr;
2629         rt->rt6i_dst.addr = *addr;
2630         rt->rt6i_dst.plen = 128;
2631         tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2632         rt->rt6i_table = fib6_get_table(net, tb_id);
2633         rt->dst.flags |= DST_NOCACHE;
2634
2635         atomic_set(&rt->dst.__refcnt, 1);
2636
2637         return rt;
2638 }
2639
2640 /* remove deleted ip from prefsrc entries */
2641 struct arg_dev_net_ip {
2642         struct net_device *dev;
2643         struct net *net;
2644         struct in6_addr *addr;
2645 };
2646
2647 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2648 {
2649         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2650         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2651         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2652
2653         if (((void *)rt->dst.dev == dev || !dev) &&
2654             rt != net->ipv6.ip6_null_entry &&
2655             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2656                 /* remove prefsrc entry */
2657                 rt->rt6i_prefsrc.plen = 0;
2658         }
2659         return 0;
2660 }
2661
2662 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2663 {
2664         struct net *net = dev_net(ifp->idev->dev);
2665         struct arg_dev_net_ip adni = {
2666                 .dev = ifp->idev->dev,
2667                 .net = net,
2668                 .addr = &ifp->addr,
2669         };
2670         fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2671 }
2672
2673 #define RTF_RA_ROUTER           (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2674 #define RTF_CACHE_GATEWAY       (RTF_GATEWAY | RTF_CACHE)
2675
2676 /* Remove routers and update dst entries when gateway turn into host. */
2677 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2678 {
2679         struct in6_addr *gateway = (struct in6_addr *)arg;
2680
2681         if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2682              ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2683              ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2684                 return -1;
2685         }
2686         return 0;
2687 }
2688
2689 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2690 {
2691         fib6_clean_all(net, fib6_clean_tohost, gateway);
2692 }
2693
2694 struct arg_dev_net {
2695         struct net_device *dev;
2696         struct net *net;
2697 };
2698
2699 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2700 {
2701         const struct arg_dev_net *adn = arg;
2702         const struct net_device *dev = adn->dev;
2703
2704         if ((rt->dst.dev == dev || !dev) &&
2705             rt != adn->net->ipv6.ip6_null_entry)
2706                 return -1;
2707
2708         return 0;
2709 }
2710
2711 void rt6_ifdown(struct net *net, struct net_device *dev)
2712 {
2713         struct arg_dev_net adn = {
2714                 .dev = dev,
2715                 .net = net,
2716         };
2717
2718         fib6_clean_all(net, fib6_ifdown, &adn);
2719         icmp6_clean_all(fib6_ifdown, &adn);
2720         if (dev)
2721                 rt6_uncached_list_flush_dev(net, dev);
2722 }
2723
2724 struct rt6_mtu_change_arg {
2725         struct net_device *dev;
2726         unsigned int mtu;
2727 };
2728
2729 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2730 {
2731         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2732         struct inet6_dev *idev;
2733
2734         /* In IPv6 pmtu discovery is not optional,
2735            so that RTAX_MTU lock cannot disable it.
2736            We still use this lock to block changes
2737            caused by addrconf/ndisc.
2738         */
2739
2740         idev = __in6_dev_get(arg->dev);
2741         if (!idev)
2742                 return 0;
2743
2744         /* For administrative MTU increase, there is no way to discover
2745            IPv6 PMTU increase, so PMTU increase should be updated here.
2746            Since RFC 1981 doesn't include administrative MTU increase
2747            update PMTU increase is a MUST. (i.e. jumbo frame)
2748          */
2749         /*
2750            If new MTU is less than route PMTU, this new MTU will be the
2751            lowest MTU in the path, update the route PMTU to reflect PMTU
2752            decreases; if new MTU is greater than route PMTU, and the
2753            old MTU is the lowest MTU in the path, update the route PMTU
2754            to reflect the increase. In this case if the other nodes' MTU
2755            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2756            PMTU discouvery.
2757          */
2758         if (rt->dst.dev == arg->dev &&
2759             !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2760                 if (rt->rt6i_flags & RTF_CACHE) {
2761                         /* For RTF_CACHE with rt6i_pmtu == 0
2762                          * (i.e. a redirected route),
2763                          * the metrics of its rt->dst.from has already
2764                          * been updated.
2765                          */
2766                         if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2767                                 rt->rt6i_pmtu = arg->mtu;
2768                 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2769                            (dst_mtu(&rt->dst) < arg->mtu &&
2770                             dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2771                         dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2772                 }
2773         }
2774         return 0;
2775 }
2776
2777 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2778 {
2779         struct rt6_mtu_change_arg arg = {
2780                 .dev = dev,
2781                 .mtu = mtu,
2782         };
2783
2784         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2785 }
2786
2787 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2788         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2789         [RTA_OIF]               = { .type = NLA_U32 },
2790         [RTA_IIF]               = { .type = NLA_U32 },
2791         [RTA_PRIORITY]          = { .type = NLA_U32 },
2792         [RTA_METRICS]           = { .type = NLA_NESTED },
2793         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2794         [RTA_PREF]              = { .type = NLA_U8 },
2795         [RTA_ENCAP_TYPE]        = { .type = NLA_U16 },
2796         [RTA_ENCAP]             = { .type = NLA_NESTED },
2797         [RTA_EXPIRES]           = { .type = NLA_U32 },
2798 };
2799
2800 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2801                               struct fib6_config *cfg)
2802 {
2803         struct rtmsg *rtm;
2804         struct nlattr *tb[RTA_MAX+1];
2805         unsigned int pref;
2806         int err;
2807
2808         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2809         if (err < 0)
2810                 goto errout;
2811
2812         err = -EINVAL;
2813         rtm = nlmsg_data(nlh);
2814         memset(cfg, 0, sizeof(*cfg));
2815
2816         cfg->fc_table = rtm->rtm_table;
2817         cfg->fc_dst_len = rtm->rtm_dst_len;
2818         cfg->fc_src_len = rtm->rtm_src_len;
2819         cfg->fc_flags = RTF_UP;
2820         cfg->fc_protocol = rtm->rtm_protocol;
2821         cfg->fc_type = rtm->rtm_type;
2822
2823         if (rtm->rtm_type == RTN_UNREACHABLE ||
2824             rtm->rtm_type == RTN_BLACKHOLE ||
2825             rtm->rtm_type == RTN_PROHIBIT ||
2826             rtm->rtm_type == RTN_THROW)
2827                 cfg->fc_flags |= RTF_REJECT;
2828
2829         if (rtm->rtm_type == RTN_LOCAL)
2830                 cfg->fc_flags |= RTF_LOCAL;
2831
2832         if (rtm->rtm_flags & RTM_F_CLONED)
2833                 cfg->fc_flags |= RTF_CACHE;
2834
2835         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2836         cfg->fc_nlinfo.nlh = nlh;
2837         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2838
2839         if (tb[RTA_GATEWAY]) {
2840                 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2841                 cfg->fc_flags |= RTF_GATEWAY;
2842         }
2843
2844         if (tb[RTA_DST]) {
2845                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2846
2847                 if (nla_len(tb[RTA_DST]) < plen)
2848                         goto errout;
2849
2850                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2851         }
2852
2853         if (tb[RTA_SRC]) {
2854                 int plen = (rtm->rtm_src_len + 7) >> 3;
2855
2856                 if (nla_len(tb[RTA_SRC]) < plen)
2857                         goto errout;
2858
2859                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2860         }
2861
2862         if (tb[RTA_PREFSRC])
2863                 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2864
2865         if (tb[RTA_OIF])
2866                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2867
2868         if (tb[RTA_PRIORITY])
2869                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2870
2871         if (tb[RTA_METRICS]) {
2872                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2873                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2874         }
2875
2876         if (tb[RTA_TABLE])
2877                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2878
2879         if (tb[RTA_MULTIPATH]) {
2880                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2881                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2882         }
2883
2884         if (tb[RTA_PREF]) {
2885                 pref = nla_get_u8(tb[RTA_PREF]);
2886                 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2887                     pref != ICMPV6_ROUTER_PREF_HIGH)
2888                         pref = ICMPV6_ROUTER_PREF_MEDIUM;
2889                 cfg->fc_flags |= RTF_PREF(pref);
2890         }
2891
2892         if (tb[RTA_ENCAP])
2893                 cfg->fc_encap = tb[RTA_ENCAP];
2894
2895         if (tb[RTA_ENCAP_TYPE])
2896                 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2897
2898         if (tb[RTA_EXPIRES]) {
2899                 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
2900
2901                 if (addrconf_finite_timeout(timeout)) {
2902                         cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
2903                         cfg->fc_flags |= RTF_EXPIRES;
2904                 }
2905         }
2906
2907         err = 0;
2908 errout:
2909         return err;
2910 }
2911
2912 struct rt6_nh {
2913         struct rt6_info *rt6_info;
2914         struct fib6_config r_cfg;
2915         struct mx6_config mxc;
2916         struct list_head next;
2917 };
2918
2919 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2920 {
2921         struct rt6_nh *nh;
2922
2923         list_for_each_entry(nh, rt6_nh_list, next) {
2924                 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2925                         &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2926                         nh->r_cfg.fc_ifindex);
2927         }
2928 }
2929
2930 static int ip6_route_info_append(struct list_head *rt6_nh_list,
2931                                  struct rt6_info *rt, struct fib6_config *r_cfg)
2932 {
2933         struct rt6_nh *nh;
2934         struct rt6_info *rtnh;
2935         int err = -EEXIST;
2936
2937         list_for_each_entry(nh, rt6_nh_list, next) {
2938                 /* check if rt6_info already exists */
2939                 rtnh = nh->rt6_info;
2940
2941                 if (rtnh->dst.dev == rt->dst.dev &&
2942                     rtnh->rt6i_idev == rt->rt6i_idev &&
2943                     ipv6_addr_equal(&rtnh->rt6i_gateway,
2944                                     &rt->rt6i_gateway))
2945                         return err;
2946         }
2947
2948         nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2949         if (!nh)
2950                 return -ENOMEM;
2951         nh->rt6_info = rt;
2952         err = ip6_convert_metrics(&nh->mxc, r_cfg);
2953         if (err) {
2954                 kfree(nh);
2955                 return err;
2956         }
2957         memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2958         list_add_tail(&nh->next, rt6_nh_list);
2959
2960         return 0;
2961 }
2962
2963 static int ip6_route_multipath_add(struct fib6_config *cfg)
2964 {
2965         struct fib6_config r_cfg;
2966         struct rtnexthop *rtnh;
2967         struct rt6_info *rt;
2968         struct rt6_nh *err_nh;
2969         struct rt6_nh *nh, *nh_safe;
2970         int remaining;
2971         int attrlen;
2972         int err = 1;
2973         int nhn = 0;
2974         int replace = (cfg->fc_nlinfo.nlh &&
2975                        (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2976         LIST_HEAD(rt6_nh_list);
2977
2978         remaining = cfg->fc_mp_len;
2979         rtnh = (struct rtnexthop *)cfg->fc_mp;
2980
2981         /* Parse a Multipath Entry and build a list (rt6_nh_list) of
2982          * rt6_info structs per nexthop
2983          */
2984         while (rtnh_ok(rtnh, remaining)) {
2985                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2986                 if (rtnh->rtnh_ifindex)
2987                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2988
2989                 attrlen = rtnh_attrlen(rtnh);
2990                 if (attrlen > 0) {
2991                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2992
2993                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2994                         if (nla) {
2995                                 r_cfg.fc_gateway = nla_get_in6_addr(nla);
2996                                 r_cfg.fc_flags |= RTF_GATEWAY;
2997                         }
2998                         r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2999                         nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3000                         if (nla)
3001                                 r_cfg.fc_encap_type = nla_get_u16(nla);
3002                 }
3003
3004                 rt = ip6_route_info_create(&r_cfg);
3005                 if (IS_ERR(rt)) {
3006                         err = PTR_ERR(rt);
3007                         rt = NULL;
3008                         goto cleanup;
3009                 }
3010
3011                 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
3012                 if (err) {
3013                         dst_free(&rt->dst);
3014                         goto cleanup;
3015                 }
3016
3017                 rtnh = rtnh_next(rtnh, &remaining);
3018         }
3019
3020         err_nh = NULL;
3021         list_for_each_entry(nh, &rt6_nh_list, next) {
3022                 err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
3023                 /* nh->rt6_info is used or freed at this point, reset to NULL*/
3024                 nh->rt6_info = NULL;
3025                 if (err) {
3026                         if (replace && nhn)
3027                                 ip6_print_replace_route_err(&rt6_nh_list);
3028                         err_nh = nh;
3029                         goto add_errout;
3030                 }
3031
3032                 /* Because each route is added like a single route we remove
3033                  * these flags after the first nexthop: if there is a collision,
3034                  * we have already failed to add the first nexthop:
3035                  * fib6_add_rt2node() has rejected it; when replacing, old
3036                  * nexthops have been replaced by first new, the rest should
3037                  * be added to it.
3038                  */
3039                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3040                                                      NLM_F_REPLACE);
3041                 nhn++;
3042         }
3043
3044         goto cleanup;
3045
3046 add_errout:
3047         /* Delete routes that were already added */
3048         list_for_each_entry(nh, &rt6_nh_list, next) {
3049                 if (err_nh == nh)
3050                         break;
3051                 ip6_route_del(&nh->r_cfg);
3052         }
3053
3054 cleanup:
3055         list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
3056                 if (nh->rt6_info)
3057                         dst_free(&nh->rt6_info->dst);
3058                 kfree(nh->mxc.mx);
3059                 list_del(&nh->next);
3060                 kfree(nh);
3061         }
3062
3063         return err;
3064 }
3065
3066 static int ip6_route_multipath_del(struct fib6_config *cfg)
3067 {
3068         struct fib6_config r_cfg;
3069         struct rtnexthop *rtnh;
3070         int remaining;
3071         int attrlen;
3072         int err = 1, last_err = 0;
3073
3074         remaining = cfg->fc_mp_len;
3075         rtnh = (struct rtnexthop *)cfg->fc_mp;
3076
3077         /* Parse a Multipath Entry */
3078         while (rtnh_ok(rtnh, remaining)) {
3079                 memcpy(&r_cfg, cfg, sizeof(*cfg));
3080                 if (rtnh->rtnh_ifindex)
3081                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3082
3083                 attrlen = rtnh_attrlen(rtnh);
3084                 if (attrlen > 0) {
3085                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3086
3087                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3088                         if (nla) {
3089                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3090                                 r_cfg.fc_flags |= RTF_GATEWAY;
3091                         }
3092                 }
3093                 err = ip6_route_del(&r_cfg);
3094                 if (err)
3095                         last_err = err;
3096
3097                 rtnh = rtnh_next(rtnh, &remaining);
3098         }
3099
3100         return last_err;
3101 }
3102
3103 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3104 {
3105         struct fib6_config cfg;
3106         int err;
3107
3108         err = rtm_to_fib6_config(skb, nlh, &cfg);
3109         if (err < 0)
3110                 return err;
3111
3112         if (cfg.fc_mp)
3113                 return ip6_route_multipath_del(&cfg);
3114         else
3115                 return ip6_route_del(&cfg);
3116 }
3117
3118 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3119 {
3120         struct fib6_config cfg;
3121         int err;
3122
3123         err = rtm_to_fib6_config(skb, nlh, &cfg);
3124         if (err < 0)
3125                 return err;
3126
3127         if (cfg.fc_mp)
3128                 return ip6_route_multipath_add(&cfg);
3129         else
3130                 return ip6_route_add(&cfg);
3131 }
3132
3133 static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3134 {
3135         return NLMSG_ALIGN(sizeof(struct rtmsg))
3136                + nla_total_size(16) /* RTA_SRC */
3137                + nla_total_size(16) /* RTA_DST */
3138                + nla_total_size(16) /* RTA_GATEWAY */
3139                + nla_total_size(16) /* RTA_PREFSRC */
3140                + nla_total_size(4) /* RTA_TABLE */
3141                + nla_total_size(4) /* RTA_IIF */
3142                + nla_total_size(4) /* RTA_OIF */
3143                + nla_total_size(4) /* RTA_PRIORITY */
3144                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3145                + nla_total_size(sizeof(struct rta_cacheinfo))
3146                + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3147                + nla_total_size(1) /* RTA_PREF */
3148                + lwtunnel_get_encap_size(rt->dst.lwtstate);
3149 }
3150
3151 static int rt6_fill_node(struct net *net,
3152                          struct sk_buff *skb, struct rt6_info *rt,
3153                          struct in6_addr *dst, struct in6_addr *src,
3154                          int iif, int type, u32 portid, u32 seq,
3155                          int prefix, int nowait, unsigned int flags)
3156 {
3157         u32 metrics[RTAX_MAX];
3158         struct rtmsg *rtm;
3159         struct nlmsghdr *nlh;
3160         long expires;
3161         u32 table;
3162
3163         if (prefix) {   /* user wants prefix routes only */
3164                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3165                         /* success since this is not a prefix route */
3166                         return 1;
3167                 }
3168         }
3169
3170         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3171         if (!nlh)
3172                 return -EMSGSIZE;
3173
3174         rtm = nlmsg_data(nlh);
3175         rtm->rtm_family = AF_INET6;
3176         rtm->rtm_dst_len = rt->rt6i_dst.plen;
3177         rtm->rtm_src_len = rt->rt6i_src.plen;
3178         rtm->rtm_tos = 0;
3179         if (rt->rt6i_table)
3180                 table = rt->rt6i_table->tb6_id;
3181         else
3182                 table = RT6_TABLE_UNSPEC;
3183         rtm->rtm_table = table;
3184         if (nla_put_u32(skb, RTA_TABLE, table))
3185                 goto nla_put_failure;
3186         if (rt->rt6i_flags & RTF_REJECT) {
3187                 switch (rt->dst.error) {
3188                 case -EINVAL:
3189                         rtm->rtm_type = RTN_BLACKHOLE;
3190                         break;
3191                 case -EACCES:
3192                         rtm->rtm_type = RTN_PROHIBIT;
3193                         break;
3194                 case -EAGAIN:
3195                         rtm->rtm_type = RTN_THROW;
3196                         break;
3197                 default:
3198                         rtm->rtm_type = RTN_UNREACHABLE;
3199                         break;
3200                 }
3201         }
3202         else if (rt->rt6i_flags & RTF_LOCAL)
3203                 rtm->rtm_type = RTN_LOCAL;
3204         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3205                 rtm->rtm_type = RTN_LOCAL;
3206         else
3207                 rtm->rtm_type = RTN_UNICAST;
3208         rtm->rtm_flags = 0;
3209         if (!netif_carrier_ok(rt->dst.dev)) {
3210                 rtm->rtm_flags |= RTNH_F_LINKDOWN;
3211                 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3212                         rtm->rtm_flags |= RTNH_F_DEAD;
3213         }
3214         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3215         rtm->rtm_protocol = rt->rt6i_protocol;
3216         if (rt->rt6i_flags & RTF_DYNAMIC)
3217                 rtm->rtm_protocol = RTPROT_REDIRECT;
3218         else if (rt->rt6i_flags & RTF_ADDRCONF) {
3219                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3220                         rtm->rtm_protocol = RTPROT_RA;
3221                 else
3222                         rtm->rtm_protocol = RTPROT_KERNEL;
3223         }
3224
3225         if (rt->rt6i_flags & RTF_CACHE)
3226                 rtm->rtm_flags |= RTM_F_CLONED;
3227
3228         if (dst) {
3229                 if (nla_put_in6_addr(skb, RTA_DST, dst))
3230                         goto nla_put_failure;
3231                 rtm->rtm_dst_len = 128;
3232         } else if (rtm->rtm_dst_len)
3233                 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3234                         goto nla_put_failure;
3235 #ifdef CONFIG_IPV6_SUBTREES
3236         if (src) {
3237                 if (nla_put_in6_addr(skb, RTA_SRC, src))
3238                         goto nla_put_failure;
3239                 rtm->rtm_src_len = 128;
3240         } else if (rtm->rtm_src_len &&
3241                    nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3242                 goto nla_put_failure;
3243 #endif
3244         if (iif) {
3245 #ifdef CONFIG_IPV6_MROUTE
3246                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3247                         int err = ip6mr_get_route(net, skb, rtm, nowait,
3248                                                   portid);
3249
3250                         if (err <= 0) {
3251                                 if (!nowait) {
3252                                         if (err == 0)
3253                                                 return 0;
3254                                         goto nla_put_failure;
3255                                 } else {
3256                                         if (err == -EMSGSIZE)
3257                                                 goto nla_put_failure;
3258                                 }
3259                         }
3260                 } else
3261 #endif
3262                         if (nla_put_u32(skb, RTA_IIF, iif))
3263                                 goto nla_put_failure;
3264         } else if (dst) {
3265                 struct in6_addr saddr_buf;
3266                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3267                     nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3268                         goto nla_put_failure;
3269         }
3270
3271         if (rt->rt6i_prefsrc.plen) {
3272                 struct in6_addr saddr_buf;
3273                 saddr_buf = rt->rt6i_prefsrc.addr;
3274                 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3275                         goto nla_put_failure;
3276         }
3277
3278         memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3279         if (rt->rt6i_pmtu)
3280                 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3281         if (rtnetlink_put_metrics(skb, metrics) < 0)
3282                 goto nla_put_failure;
3283
3284         if (rt->rt6i_flags & RTF_GATEWAY) {
3285                 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3286                         goto nla_put_failure;
3287         }
3288
3289         if (rt->dst.dev &&
3290             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3291                 goto nla_put_failure;
3292         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3293                 goto nla_put_failure;
3294
3295         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3296
3297         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3298                 goto nla_put_failure;
3299
3300         if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3301                 goto nla_put_failure;
3302
3303         lwtunnel_fill_encap(skb, rt->dst.lwtstate);
3304
3305         nlmsg_end(skb, nlh);
3306         return 0;
3307
3308 nla_put_failure:
3309         nlmsg_cancel(skb, nlh);
3310         return -EMSGSIZE;
3311 }
3312
3313 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3314 {
3315         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3316         int prefix;
3317
3318         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3319                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3320                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3321         } else
3322                 prefix = 0;
3323
3324         return rt6_fill_node(arg->net,
3325                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3326                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3327                      prefix, 0, NLM_F_MULTI);
3328 }
3329
3330 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3331 {
3332         struct net *net = sock_net(in_skb->sk);
3333         struct nlattr *tb[RTA_MAX+1];
3334         struct rt6_info *rt;
3335         struct sk_buff *skb;
3336         struct rtmsg *rtm;
3337         struct flowi6 fl6;
3338         int err, iif = 0, oif = 0;
3339
3340         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3341         if (err < 0)
3342                 goto errout;
3343
3344         err = -EINVAL;
3345         memset(&fl6, 0, sizeof(fl6));
3346         rtm = nlmsg_data(nlh);
3347         fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
3348
3349         if (tb[RTA_SRC]) {
3350                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3351                         goto errout;
3352
3353                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3354         }
3355
3356         if (tb[RTA_DST]) {
3357                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3358                         goto errout;
3359
3360                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3361         }
3362
3363         if (tb[RTA_IIF])
3364                 iif = nla_get_u32(tb[RTA_IIF]);
3365
3366         if (tb[RTA_OIF])
3367                 oif = nla_get_u32(tb[RTA_OIF]);
3368
3369         if (tb[RTA_MARK])
3370                 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3371
3372         if (iif) {
3373                 struct net_device *dev;
3374                 int flags = 0;
3375
3376                 dev = __dev_get_by_index(net, iif);
3377                 if (!dev) {
3378                         err = -ENODEV;
3379                         goto errout;
3380                 }
3381
3382                 fl6.flowi6_iif = iif;
3383
3384                 if (!ipv6_addr_any(&fl6.saddr))
3385                         flags |= RT6_LOOKUP_F_HAS_SADDR;
3386
3387                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3388                                                                flags);
3389         } else {
3390                 fl6.flowi6_oif = oif;
3391
3392                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3393         }
3394
3395         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3396         if (!skb) {
3397                 ip6_rt_put(rt);
3398                 err = -ENOBUFS;
3399                 goto errout;
3400         }
3401
3402         /* Reserve room for dummy headers, this skb can pass
3403            through good chunk of routing engine.
3404          */
3405         skb_reset_mac_header(skb);
3406         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3407
3408         skb_dst_set(skb, &rt->dst);
3409
3410         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3411                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3412                             nlh->nlmsg_seq, 0, 0, 0);
3413         if (err < 0) {
3414                 kfree_skb(skb);
3415                 goto errout;
3416         }
3417
3418         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3419 errout:
3420         return err;
3421 }
3422
3423 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3424                      unsigned int nlm_flags)
3425 {
3426         struct sk_buff *skb;
3427         struct net *net = info->nl_net;
3428         u32 seq;
3429         int err;
3430
3431         err = -ENOBUFS;
3432         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3433
3434         skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3435         if (!skb)
3436                 goto errout;
3437
3438         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3439                                 event, info->portid, seq, 0, 0, nlm_flags);
3440         if (err < 0) {
3441                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3442                 WARN_ON(err == -EMSGSIZE);
3443                 kfree_skb(skb);
3444                 goto errout;
3445         }
3446         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3447                     info->nlh, gfp_any());
3448         return;
3449 errout:
3450         if (err < 0)
3451                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3452 }
3453
3454 static int ip6_route_dev_notify(struct notifier_block *this,
3455                                 unsigned long event, void *ptr)
3456 {
3457         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3458         struct net *net = dev_net(dev);
3459
3460         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3461                 net->ipv6.ip6_null_entry->dst.dev = dev;
3462                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3463 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3464                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3465                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3466                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3467                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3468 #endif
3469         }
3470
3471         return NOTIFY_OK;
3472 }
3473
3474 /*
3475  *      /proc
3476  */
3477
3478 #ifdef CONFIG_PROC_FS
3479
3480 static const struct file_operations ipv6_route_proc_fops = {
3481         .owner          = THIS_MODULE,
3482         .open           = ipv6_route_open,
3483         .read           = seq_read,
3484         .llseek         = seq_lseek,
3485         .release        = seq_release_net,
3486 };
3487
3488 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3489 {
3490         struct net *net = (struct net *)seq->private;
3491         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3492                    net->ipv6.rt6_stats->fib_nodes,
3493                    net->ipv6.rt6_stats->fib_route_nodes,
3494                    net->ipv6.rt6_stats->fib_rt_alloc,
3495                    net->ipv6.rt6_stats->fib_rt_entries,
3496                    net->ipv6.rt6_stats->fib_rt_cache,
3497                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3498                    net->ipv6.rt6_stats->fib_discarded_routes);
3499
3500         return 0;
3501 }
3502
3503 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3504 {
3505         return single_open_net(inode, file, rt6_stats_seq_show);
3506 }
3507
3508 static const struct file_operations rt6_stats_seq_fops = {
3509         .owner   = THIS_MODULE,
3510         .open    = rt6_stats_seq_open,
3511         .read    = seq_read,
3512         .llseek  = seq_lseek,
3513         .release = single_release_net,
3514 };
3515 #endif  /* CONFIG_PROC_FS */
3516
3517 #ifdef CONFIG_SYSCTL
3518
3519 static
3520 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3521                               void __user *buffer, size_t *lenp, loff_t *ppos)
3522 {
3523         struct net *net;
3524         int delay;
3525         if (!write)
3526                 return -EINVAL;
3527
3528         net = (struct net *)ctl->extra1;
3529         delay = net->ipv6.sysctl.flush_delay;
3530         proc_dointvec(ctl, write, buffer, lenp, ppos);
3531         fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3532         return 0;
3533 }
3534
3535 struct ctl_table ipv6_route_table_template[] = {
3536         {
3537                 .procname       =       "flush",
3538                 .data           =       &init_net.ipv6.sysctl.flush_delay,
3539                 .maxlen         =       sizeof(int),
3540                 .mode           =       0200,
3541                 .proc_handler   =       ipv6_sysctl_rtcache_flush
3542         },
3543         {
3544                 .procname       =       "gc_thresh",
3545                 .data           =       &ip6_dst_ops_template.gc_thresh,
3546                 .maxlen         =       sizeof(int),
3547                 .mode           =       0644,
3548                 .proc_handler   =       proc_dointvec,
3549         },
3550         {
3551                 .procname       =       "max_size",
3552                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
3553                 .maxlen         =       sizeof(int),
3554                 .mode           =       0644,
3555                 .proc_handler   =       proc_dointvec,
3556         },
3557         {
3558                 .procname       =       "gc_min_interval",
3559                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3560                 .maxlen         =       sizeof(int),
3561                 .mode           =       0644,
3562                 .proc_handler   =       proc_dointvec_jiffies,
3563         },
3564         {
3565                 .procname       =       "gc_timeout",
3566                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3567                 .maxlen         =       sizeof(int),
3568                 .mode           =       0644,
3569                 .proc_handler   =       proc_dointvec_jiffies,
3570         },
3571         {
3572                 .procname       =       "gc_interval",
3573                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3574                 .maxlen         =       sizeof(int),
3575                 .mode           =       0644,
3576                 .proc_handler   =       proc_dointvec_jiffies,
3577         },
3578         {
3579                 .procname       =       "gc_elasticity",
3580                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3581                 .maxlen         =       sizeof(int),
3582                 .mode           =       0644,
3583                 .proc_handler   =       proc_dointvec,
3584         },
3585         {
3586                 .procname       =       "mtu_expires",
3587                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3588                 .maxlen         =       sizeof(int),
3589                 .mode           =       0644,
3590                 .proc_handler   =       proc_dointvec_jiffies,
3591         },
3592         {
3593                 .procname       =       "min_adv_mss",
3594                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3595                 .maxlen         =       sizeof(int),
3596                 .mode           =       0644,
3597                 .proc_handler   =       proc_dointvec,
3598         },
3599         {
3600                 .procname       =       "gc_min_interval_ms",
3601                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3602                 .maxlen         =       sizeof(int),
3603                 .mode           =       0644,
3604                 .proc_handler   =       proc_dointvec_ms_jiffies,
3605         },
3606         { }
3607 };
3608
3609 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3610 {
3611         struct ctl_table *table;
3612
3613         table = kmemdup(ipv6_route_table_template,
3614                         sizeof(ipv6_route_table_template),
3615                         GFP_KERNEL);
3616
3617         if (table) {
3618                 table[0].data = &net->ipv6.sysctl.flush_delay;
3619                 table[0].extra1 = net;
3620                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3621                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3622                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3623                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3624                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3625                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3626                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3627                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3628                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3629
3630                 /* Don't export sysctls to unprivileged users */
3631                 if (net->user_ns != &init_user_ns)
3632                         table[0].procname = NULL;
3633         }
3634
3635         return table;
3636 }
3637 #endif
3638
3639 static int __net_init ip6_route_net_init(struct net *net)
3640 {
3641         int ret = -ENOMEM;
3642
3643         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3644                sizeof(net->ipv6.ip6_dst_ops));
3645
3646         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3647                 goto out_ip6_dst_ops;
3648
3649         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3650                                            sizeof(*net->ipv6.ip6_null_entry),
3651                                            GFP_KERNEL);
3652         if (!net->ipv6.ip6_null_entry)
3653                 goto out_ip6_dst_entries;
3654         net->ipv6.ip6_null_entry->dst.path =
3655                 (struct dst_entry *)net->ipv6.ip6_null_entry;
3656         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3657         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3658                          ip6_template_metrics, true);
3659
3660 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3661         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3662                                                sizeof(*net->ipv6.ip6_prohibit_entry),
3663                                                GFP_KERNEL);
3664         if (!net->ipv6.ip6_prohibit_entry)
3665                 goto out_ip6_null_entry;
3666         net->ipv6.ip6_prohibit_entry->dst.path =
3667                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3668         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3669         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3670                          ip6_template_metrics, true);
3671
3672         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3673                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
3674                                                GFP_KERNEL);
3675         if (!net->ipv6.ip6_blk_hole_entry)
3676                 goto out_ip6_prohibit_entry;
3677         net->ipv6.ip6_blk_hole_entry->dst.path =
3678                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3679         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3680         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3681                          ip6_template_metrics, true);
3682 #endif
3683
3684         net->ipv6.sysctl.flush_delay = 0;
3685         net->ipv6.sysctl.ip6_rt_max_size = 4096;
3686         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3687         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3688         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3689         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3690         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3691         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3692
3693         net->ipv6.ip6_rt_gc_expire = 30*HZ;
3694
3695         ret = 0;
3696 out:
3697         return ret;
3698
3699 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3700 out_ip6_prohibit_entry:
3701         kfree(net->ipv6.ip6_prohibit_entry);
3702 out_ip6_null_entry:
3703         kfree(net->ipv6.ip6_null_entry);
3704 #endif
3705 out_ip6_dst_entries:
3706         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3707 out_ip6_dst_ops:
3708         goto out;
3709 }
3710
3711 static void __net_exit ip6_route_net_exit(struct net *net)
3712 {
3713         kfree(net->ipv6.ip6_null_entry);
3714 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3715         kfree(net->ipv6.ip6_prohibit_entry);
3716         kfree(net->ipv6.ip6_blk_hole_entry);
3717 #endif
3718         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3719 }
3720
3721 static int __net_init ip6_route_net_init_late(struct net *net)
3722 {
3723 #ifdef CONFIG_PROC_FS
3724         proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3725         proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3726 #endif
3727         return 0;
3728 }
3729
3730 static void __net_exit ip6_route_net_exit_late(struct net *net)
3731 {
3732 #ifdef CONFIG_PROC_FS
3733         remove_proc_entry("ipv6_route", net->proc_net);
3734         remove_proc_entry("rt6_stats", net->proc_net);
3735 #endif
3736 }
3737
3738 static struct pernet_operations ip6_route_net_ops = {
3739         .init = ip6_route_net_init,
3740         .exit = ip6_route_net_exit,
3741 };
3742
3743 static int __net_init ipv6_inetpeer_init(struct net *net)
3744 {
3745         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3746
3747         if (!bp)
3748                 return -ENOMEM;
3749         inet_peer_base_init(bp);
3750         net->ipv6.peers = bp;
3751         return 0;
3752 }
3753
3754 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3755 {
3756         struct inet_peer_base *bp = net->ipv6.peers;
3757
3758         net->ipv6.peers = NULL;
3759         inetpeer_invalidate_tree(bp);
3760         kfree(bp);
3761 }
3762
3763 static struct pernet_operations ipv6_inetpeer_ops = {
3764         .init   =       ipv6_inetpeer_init,
3765         .exit   =       ipv6_inetpeer_exit,
3766 };
3767
3768 static struct pernet_operations ip6_route_net_late_ops = {
3769         .init = ip6_route_net_init_late,
3770         .exit = ip6_route_net_exit_late,
3771 };
3772
3773 static struct notifier_block ip6_route_dev_notifier = {
3774         .notifier_call = ip6_route_dev_notify,
3775         .priority = 0,
3776 };
3777
3778 int __init ip6_route_init(void)
3779 {
3780         int ret;
3781         int cpu;
3782
3783         ret = -ENOMEM;
3784         ip6_dst_ops_template.kmem_cachep =
3785                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3786                                   SLAB_HWCACHE_ALIGN, NULL);
3787         if (!ip6_dst_ops_template.kmem_cachep)
3788                 goto out;
3789
3790         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3791         if (ret)
3792                 goto out_kmem_cache;
3793
3794         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3795         if (ret)
3796                 goto out_dst_entries;
3797
3798         ret = register_pernet_subsys(&ip6_route_net_ops);
3799         if (ret)
3800                 goto out_register_inetpeer;
3801
3802         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3803
3804         /* Registering of the loopback is done before this portion of code,
3805          * the loopback reference in rt6_info will not be taken, do it
3806          * manually for init_net */
3807         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3808         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3809   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3810         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3811         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3812         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3813         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3814   #endif
3815         ret = fib6_init();
3816         if (ret)
3817                 goto out_register_subsys;
3818
3819         ret = xfrm6_init();
3820         if (ret)
3821                 goto out_fib6_init;
3822
3823         ret = fib6_rules_init();
3824         if (ret)
3825                 goto xfrm6_init;
3826
3827         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3828         if (ret)
3829                 goto fib6_rules_init;
3830
3831         ret = -ENOBUFS;
3832         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3833             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3834             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3835                 goto out_register_late_subsys;
3836
3837         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3838         if (ret)
3839                 goto out_register_late_subsys;
3840
3841         for_each_possible_cpu(cpu) {
3842                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3843
3844                 INIT_LIST_HEAD(&ul->head);
3845                 spin_lock_init(&ul->lock);
3846         }
3847
3848 out:
3849         return ret;
3850
3851 out_register_late_subsys:
3852         unregister_pernet_subsys(&ip6_route_net_late_ops);
3853 fib6_rules_init:
3854         fib6_rules_cleanup();
3855 xfrm6_init:
3856         xfrm6_fini();
3857 out_fib6_init:
3858         fib6_gc_cleanup();
3859 out_register_subsys:
3860         unregister_pernet_subsys(&ip6_route_net_ops);
3861 out_register_inetpeer:
3862         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3863 out_dst_entries:
3864         dst_entries_destroy(&ip6_dst_blackhole_ops);
3865 out_kmem_cache:
3866         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3867         goto out;
3868 }
3869
3870 void ip6_route_cleanup(void)
3871 {
3872         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3873         unregister_pernet_subsys(&ip6_route_net_late_ops);
3874         fib6_rules_cleanup();
3875         xfrm6_fini();
3876         fib6_gc_cleanup();
3877         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3878         unregister_pernet_subsys(&ip6_route_net_ops);
3879         dst_entries_destroy(&ip6_dst_blackhole_ops);
3880         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3881 }