Merge tag 'drivers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm...
[cascardo/linux.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 enum rt6_nud_state {
69         RT6_NUD_FAIL_HARD = -3,
70         RT6_NUD_FAIL_PROBE = -2,
71         RT6_NUD_FAIL_DO_RR = -1,
72         RT6_NUD_SUCCEED = 1
73 };
74
75 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
76                                     const struct in6_addr *dest);
77 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
79 static unsigned int      ip6_mtu(const struct dst_entry *dst);
80 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81 static void             ip6_dst_destroy(struct dst_entry *);
82 static void             ip6_dst_ifdown(struct dst_entry *,
83                                        struct net_device *dev, int how);
84 static int               ip6_dst_gc(struct dst_ops *ops);
85
86 static int              ip6_pkt_discard(struct sk_buff *skb);
87 static int              ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
88 static int              ip6_pkt_prohibit(struct sk_buff *skb);
89 static int              ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
90 static void             ip6_link_failure(struct sk_buff *skb);
91 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
92                                            struct sk_buff *skb, u32 mtu);
93 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
94                                         struct sk_buff *skb);
95 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
96
97 #ifdef CONFIG_IPV6_ROUTE_INFO
98 static struct rt6_info *rt6_add_route_info(struct net *net,
99                                            const struct in6_addr *prefix, int prefixlen,
100                                            const struct in6_addr *gwaddr, int ifindex,
101                                            unsigned int pref);
102 static struct rt6_info *rt6_get_route_info(struct net *net,
103                                            const struct in6_addr *prefix, int prefixlen,
104                                            const struct in6_addr *gwaddr, int ifindex);
105 #endif
106
107 static void rt6_bind_peer(struct rt6_info *rt, int create)
108 {
109         struct inet_peer_base *base;
110         struct inet_peer *peer;
111
112         base = inetpeer_base_ptr(rt->_rt6i_peer);
113         if (!base)
114                 return;
115
116         peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
117         if (peer) {
118                 if (!rt6_set_peer(rt, peer))
119                         inet_putpeer(peer);
120         }
121 }
122
123 static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create)
124 {
125         if (rt6_has_peer(rt))
126                 return rt6_peer_ptr(rt);
127
128         rt6_bind_peer(rt, create);
129         return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL);
130 }
131
132 static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt)
133 {
134         return __rt6_get_peer(rt, 1);
135 }
136
137 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
138 {
139         struct rt6_info *rt = (struct rt6_info *) dst;
140         struct inet_peer *peer;
141         u32 *p = NULL;
142
143         if (!(rt->dst.flags & DST_HOST))
144                 return NULL;
145
146         peer = rt6_get_peer_create(rt);
147         if (peer) {
148                 u32 *old_p = __DST_METRICS_PTR(old);
149                 unsigned long prev, new;
150
151                 p = peer->metrics;
152                 if (inet_metrics_new(peer) ||
153                     (old & DST_METRICS_FORCE_OVERWRITE))
154                         memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
155
156                 new = (unsigned long) p;
157                 prev = cmpxchg(&dst->_metrics, old, new);
158
159                 if (prev != old) {
160                         p = __DST_METRICS_PTR(prev);
161                         if (prev & DST_METRICS_READ_ONLY)
162                                 p = NULL;
163                 }
164         }
165         return p;
166 }
167
168 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
169                                              struct sk_buff *skb,
170                                              const void *daddr)
171 {
172         struct in6_addr *p = &rt->rt6i_gateway;
173
174         if (!ipv6_addr_any(p))
175                 return (const void *) p;
176         else if (skb)
177                 return &ipv6_hdr(skb)->daddr;
178         return daddr;
179 }
180
181 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
182                                           struct sk_buff *skb,
183                                           const void *daddr)
184 {
185         struct rt6_info *rt = (struct rt6_info *) dst;
186         struct neighbour *n;
187
188         daddr = choose_neigh_daddr(rt, skb, daddr);
189         n = __ipv6_neigh_lookup(dst->dev, daddr);
190         if (n)
191                 return n;
192         return neigh_create(&nd_tbl, daddr, dst->dev);
193 }
194
195 static struct dst_ops ip6_dst_ops_template = {
196         .family                 =       AF_INET6,
197         .protocol               =       cpu_to_be16(ETH_P_IPV6),
198         .gc                     =       ip6_dst_gc,
199         .gc_thresh              =       1024,
200         .check                  =       ip6_dst_check,
201         .default_advmss         =       ip6_default_advmss,
202         .mtu                    =       ip6_mtu,
203         .cow_metrics            =       ipv6_cow_metrics,
204         .destroy                =       ip6_dst_destroy,
205         .ifdown                 =       ip6_dst_ifdown,
206         .negative_advice        =       ip6_negative_advice,
207         .link_failure           =       ip6_link_failure,
208         .update_pmtu            =       ip6_rt_update_pmtu,
209         .redirect               =       rt6_do_redirect,
210         .local_out              =       __ip6_local_out,
211         .neigh_lookup           =       ip6_neigh_lookup,
212 };
213
214 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
215 {
216         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
217
218         return mtu ? : dst->dev->mtu;
219 }
220
221 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
222                                          struct sk_buff *skb, u32 mtu)
223 {
224 }
225
226 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
227                                       struct sk_buff *skb)
228 {
229 }
230
231 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
232                                          unsigned long old)
233 {
234         return NULL;
235 }
236
237 static struct dst_ops ip6_dst_blackhole_ops = {
238         .family                 =       AF_INET6,
239         .protocol               =       cpu_to_be16(ETH_P_IPV6),
240         .destroy                =       ip6_dst_destroy,
241         .check                  =       ip6_dst_check,
242         .mtu                    =       ip6_blackhole_mtu,
243         .default_advmss         =       ip6_default_advmss,
244         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
245         .redirect               =       ip6_rt_blackhole_redirect,
246         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
247         .neigh_lookup           =       ip6_neigh_lookup,
248 };
249
250 static const u32 ip6_template_metrics[RTAX_MAX] = {
251         [RTAX_HOPLIMIT - 1] = 0,
252 };
253
254 static const struct rt6_info ip6_null_entry_template = {
255         .dst = {
256                 .__refcnt       = ATOMIC_INIT(1),
257                 .__use          = 1,
258                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
259                 .error          = -ENETUNREACH,
260                 .input          = ip6_pkt_discard,
261                 .output         = ip6_pkt_discard_out,
262         },
263         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
264         .rt6i_protocol  = RTPROT_KERNEL,
265         .rt6i_metric    = ~(u32) 0,
266         .rt6i_ref       = ATOMIC_INIT(1),
267 };
268
269 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
270
271 static const struct rt6_info ip6_prohibit_entry_template = {
272         .dst = {
273                 .__refcnt       = ATOMIC_INIT(1),
274                 .__use          = 1,
275                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
276                 .error          = -EACCES,
277                 .input          = ip6_pkt_prohibit,
278                 .output         = ip6_pkt_prohibit_out,
279         },
280         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
281         .rt6i_protocol  = RTPROT_KERNEL,
282         .rt6i_metric    = ~(u32) 0,
283         .rt6i_ref       = ATOMIC_INIT(1),
284 };
285
286 static const struct rt6_info ip6_blk_hole_entry_template = {
287         .dst = {
288                 .__refcnt       = ATOMIC_INIT(1),
289                 .__use          = 1,
290                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
291                 .error          = -EINVAL,
292                 .input          = dst_discard,
293                 .output         = dst_discard_sk,
294         },
295         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
296         .rt6i_protocol  = RTPROT_KERNEL,
297         .rt6i_metric    = ~(u32) 0,
298         .rt6i_ref       = ATOMIC_INIT(1),
299 };
300
301 #endif
302
303 /* allocate dst with ip6_dst_ops */
304 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
305                                              struct net_device *dev,
306                                              int flags,
307                                              struct fib6_table *table)
308 {
309         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
310                                         0, DST_OBSOLETE_FORCE_CHK, flags);
311
312         if (rt) {
313                 struct dst_entry *dst = &rt->dst;
314
315                 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
316                 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
317                 INIT_LIST_HEAD(&rt->rt6i_siblings);
318         }
319         return rt;
320 }
321
322 static void ip6_dst_destroy(struct dst_entry *dst)
323 {
324         struct rt6_info *rt = (struct rt6_info *)dst;
325         struct inet6_dev *idev = rt->rt6i_idev;
326         struct dst_entry *from = dst->from;
327
328         if (!(rt->dst.flags & DST_HOST))
329                 dst_destroy_metrics_generic(dst);
330
331         if (idev) {
332                 rt->rt6i_idev = NULL;
333                 in6_dev_put(idev);
334         }
335
336         dst->from = NULL;
337         dst_release(from);
338
339         if (rt6_has_peer(rt)) {
340                 struct inet_peer *peer = rt6_peer_ptr(rt);
341                 inet_putpeer(peer);
342         }
343 }
344
345 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
346                            int how)
347 {
348         struct rt6_info *rt = (struct rt6_info *)dst;
349         struct inet6_dev *idev = rt->rt6i_idev;
350         struct net_device *loopback_dev =
351                 dev_net(dev)->loopback_dev;
352
353         if (dev != loopback_dev) {
354                 if (idev && idev->dev == dev) {
355                         struct inet6_dev *loopback_idev =
356                                 in6_dev_get(loopback_dev);
357                         if (loopback_idev) {
358                                 rt->rt6i_idev = loopback_idev;
359                                 in6_dev_put(idev);
360                         }
361                 }
362         }
363 }
364
365 static bool rt6_check_expired(const struct rt6_info *rt)
366 {
367         if (rt->rt6i_flags & RTF_EXPIRES) {
368                 if (time_after(jiffies, rt->dst.expires))
369                         return true;
370         } else if (rt->dst.from) {
371                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
372         }
373         return false;
374 }
375
376 /* Multipath route selection:
377  *   Hash based function using packet header and flowlabel.
378  * Adapted from fib_info_hashfn()
379  */
380 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
381                                const struct flowi6 *fl6)
382 {
383         unsigned int val = fl6->flowi6_proto;
384
385         val ^= ipv6_addr_hash(&fl6->daddr);
386         val ^= ipv6_addr_hash(&fl6->saddr);
387
388         /* Work only if this not encapsulated */
389         switch (fl6->flowi6_proto) {
390         case IPPROTO_UDP:
391         case IPPROTO_TCP:
392         case IPPROTO_SCTP:
393                 val ^= (__force u16)fl6->fl6_sport;
394                 val ^= (__force u16)fl6->fl6_dport;
395                 break;
396
397         case IPPROTO_ICMPV6:
398                 val ^= (__force u16)fl6->fl6_icmp_type;
399                 val ^= (__force u16)fl6->fl6_icmp_code;
400                 break;
401         }
402         /* RFC6438 recommands to use flowlabel */
403         val ^= (__force u32)fl6->flowlabel;
404
405         /* Perhaps, we need to tune, this function? */
406         val = val ^ (val >> 7) ^ (val >> 12);
407         return val % candidate_count;
408 }
409
410 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
411                                              struct flowi6 *fl6, int oif,
412                                              int strict)
413 {
414         struct rt6_info *sibling, *next_sibling;
415         int route_choosen;
416
417         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
418         /* Don't change the route, if route_choosen == 0
419          * (siblings does not include ourself)
420          */
421         if (route_choosen)
422                 list_for_each_entry_safe(sibling, next_sibling,
423                                 &match->rt6i_siblings, rt6i_siblings) {
424                         route_choosen--;
425                         if (route_choosen == 0) {
426                                 if (rt6_score_route(sibling, oif, strict) < 0)
427                                         break;
428                                 match = sibling;
429                                 break;
430                         }
431                 }
432         return match;
433 }
434
435 /*
436  *      Route lookup. Any table->tb6_lock is implied.
437  */
438
439 static inline struct rt6_info *rt6_device_match(struct net *net,
440                                                     struct rt6_info *rt,
441                                                     const struct in6_addr *saddr,
442                                                     int oif,
443                                                     int flags)
444 {
445         struct rt6_info *local = NULL;
446         struct rt6_info *sprt;
447
448         if (!oif && ipv6_addr_any(saddr))
449                 goto out;
450
451         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
452                 struct net_device *dev = sprt->dst.dev;
453
454                 if (oif) {
455                         if (dev->ifindex == oif)
456                                 return sprt;
457                         if (dev->flags & IFF_LOOPBACK) {
458                                 if (!sprt->rt6i_idev ||
459                                     sprt->rt6i_idev->dev->ifindex != oif) {
460                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
461                                                 continue;
462                                         if (local && (!oif ||
463                                                       local->rt6i_idev->dev->ifindex == oif))
464                                                 continue;
465                                 }
466                                 local = sprt;
467                         }
468                 } else {
469                         if (ipv6_chk_addr(net, saddr, dev,
470                                           flags & RT6_LOOKUP_F_IFACE))
471                                 return sprt;
472                 }
473         }
474
475         if (oif) {
476                 if (local)
477                         return local;
478
479                 if (flags & RT6_LOOKUP_F_IFACE)
480                         return net->ipv6.ip6_null_entry;
481         }
482 out:
483         return rt;
484 }
485
486 #ifdef CONFIG_IPV6_ROUTER_PREF
487 struct __rt6_probe_work {
488         struct work_struct work;
489         struct in6_addr target;
490         struct net_device *dev;
491 };
492
493 static void rt6_probe_deferred(struct work_struct *w)
494 {
495         struct in6_addr mcaddr;
496         struct __rt6_probe_work *work =
497                 container_of(w, struct __rt6_probe_work, work);
498
499         addrconf_addr_solict_mult(&work->target, &mcaddr);
500         ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
501         dev_put(work->dev);
502         kfree(w);
503 }
504
505 static void rt6_probe(struct rt6_info *rt)
506 {
507         struct neighbour *neigh;
508         /*
509          * Okay, this does not seem to be appropriate
510          * for now, however, we need to check if it
511          * is really so; aka Router Reachability Probing.
512          *
513          * Router Reachability Probe MUST be rate-limited
514          * to no more than one per minute.
515          */
516         if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
517                 return;
518         rcu_read_lock_bh();
519         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
520         if (neigh) {
521                 write_lock(&neigh->lock);
522                 if (neigh->nud_state & NUD_VALID)
523                         goto out;
524         }
525
526         if (!neigh ||
527             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
528                 struct __rt6_probe_work *work;
529
530                 work = kmalloc(sizeof(*work), GFP_ATOMIC);
531
532                 if (neigh && work)
533                         __neigh_set_probe_once(neigh);
534
535                 if (neigh)
536                         write_unlock(&neigh->lock);
537
538                 if (work) {
539                         INIT_WORK(&work->work, rt6_probe_deferred);
540                         work->target = rt->rt6i_gateway;
541                         dev_hold(rt->dst.dev);
542                         work->dev = rt->dst.dev;
543                         schedule_work(&work->work);
544                 }
545         } else {
546 out:
547                 write_unlock(&neigh->lock);
548         }
549         rcu_read_unlock_bh();
550 }
551 #else
552 static inline void rt6_probe(struct rt6_info *rt)
553 {
554 }
555 #endif
556
557 /*
558  * Default Router Selection (RFC 2461 6.3.6)
559  */
560 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
561 {
562         struct net_device *dev = rt->dst.dev;
563         if (!oif || dev->ifindex == oif)
564                 return 2;
565         if ((dev->flags & IFF_LOOPBACK) &&
566             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
567                 return 1;
568         return 0;
569 }
570
571 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
572 {
573         struct neighbour *neigh;
574         enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
575
576         if (rt->rt6i_flags & RTF_NONEXTHOP ||
577             !(rt->rt6i_flags & RTF_GATEWAY))
578                 return RT6_NUD_SUCCEED;
579
580         rcu_read_lock_bh();
581         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
582         if (neigh) {
583                 read_lock(&neigh->lock);
584                 if (neigh->nud_state & NUD_VALID)
585                         ret = RT6_NUD_SUCCEED;
586 #ifdef CONFIG_IPV6_ROUTER_PREF
587                 else if (!(neigh->nud_state & NUD_FAILED))
588                         ret = RT6_NUD_SUCCEED;
589                 else
590                         ret = RT6_NUD_FAIL_PROBE;
591 #endif
592                 read_unlock(&neigh->lock);
593         } else {
594                 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
595                       RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
596         }
597         rcu_read_unlock_bh();
598
599         return ret;
600 }
601
602 static int rt6_score_route(struct rt6_info *rt, int oif,
603                            int strict)
604 {
605         int m;
606
607         m = rt6_check_dev(rt, oif);
608         if (!m && (strict & RT6_LOOKUP_F_IFACE))
609                 return RT6_NUD_FAIL_HARD;
610 #ifdef CONFIG_IPV6_ROUTER_PREF
611         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
612 #endif
613         if (strict & RT6_LOOKUP_F_REACHABLE) {
614                 int n = rt6_check_neigh(rt);
615                 if (n < 0)
616                         return n;
617         }
618         return m;
619 }
620
621 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
622                                    int *mpri, struct rt6_info *match,
623                                    bool *do_rr)
624 {
625         int m;
626         bool match_do_rr = false;
627
628         if (rt6_check_expired(rt))
629                 goto out;
630
631         m = rt6_score_route(rt, oif, strict);
632         if (m == RT6_NUD_FAIL_DO_RR) {
633                 match_do_rr = true;
634                 m = 0; /* lowest valid score */
635         } else if (m == RT6_NUD_FAIL_HARD) {
636                 goto out;
637         }
638
639         if (strict & RT6_LOOKUP_F_REACHABLE)
640                 rt6_probe(rt);
641
642         /* note that m can be RT6_NUD_FAIL_PROBE at this point */
643         if (m > *mpri) {
644                 *do_rr = match_do_rr;
645                 *mpri = m;
646                 match = rt;
647         }
648 out:
649         return match;
650 }
651
652 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
653                                      struct rt6_info *rr_head,
654                                      u32 metric, int oif, int strict,
655                                      bool *do_rr)
656 {
657         struct rt6_info *rt, *match;
658         int mpri = -1;
659
660         match = NULL;
661         for (rt = rr_head; rt && rt->rt6i_metric == metric;
662              rt = rt->dst.rt6_next)
663                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
664         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
665              rt = rt->dst.rt6_next)
666                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
667
668         return match;
669 }
670
671 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
672 {
673         struct rt6_info *match, *rt0;
674         struct net *net;
675         bool do_rr = false;
676
677         rt0 = fn->rr_ptr;
678         if (!rt0)
679                 fn->rr_ptr = rt0 = fn->leaf;
680
681         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
682                              &do_rr);
683
684         if (do_rr) {
685                 struct rt6_info *next = rt0->dst.rt6_next;
686
687                 /* no entries matched; do round-robin */
688                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
689                         next = fn->leaf;
690
691                 if (next != rt0)
692                         fn->rr_ptr = next;
693         }
694
695         net = dev_net(rt0->dst.dev);
696         return match ? match : net->ipv6.ip6_null_entry;
697 }
698
699 #ifdef CONFIG_IPV6_ROUTE_INFO
700 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
701                   const struct in6_addr *gwaddr)
702 {
703         struct net *net = dev_net(dev);
704         struct route_info *rinfo = (struct route_info *) opt;
705         struct in6_addr prefix_buf, *prefix;
706         unsigned int pref;
707         unsigned long lifetime;
708         struct rt6_info *rt;
709
710         if (len < sizeof(struct route_info)) {
711                 return -EINVAL;
712         }
713
714         /* Sanity check for prefix_len and length */
715         if (rinfo->length > 3) {
716                 return -EINVAL;
717         } else if (rinfo->prefix_len > 128) {
718                 return -EINVAL;
719         } else if (rinfo->prefix_len > 64) {
720                 if (rinfo->length < 2) {
721                         return -EINVAL;
722                 }
723         } else if (rinfo->prefix_len > 0) {
724                 if (rinfo->length < 1) {
725                         return -EINVAL;
726                 }
727         }
728
729         pref = rinfo->route_pref;
730         if (pref == ICMPV6_ROUTER_PREF_INVALID)
731                 return -EINVAL;
732
733         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
734
735         if (rinfo->length == 3)
736                 prefix = (struct in6_addr *)rinfo->prefix;
737         else {
738                 /* this function is safe */
739                 ipv6_addr_prefix(&prefix_buf,
740                                  (struct in6_addr *)rinfo->prefix,
741                                  rinfo->prefix_len);
742                 prefix = &prefix_buf;
743         }
744
745         if (rinfo->prefix_len == 0)
746                 rt = rt6_get_dflt_router(gwaddr, dev);
747         else
748                 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
749                                         gwaddr, dev->ifindex);
750
751         if (rt && !lifetime) {
752                 ip6_del_rt(rt);
753                 rt = NULL;
754         }
755
756         if (!rt && lifetime)
757                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
758                                         pref);
759         else if (rt)
760                 rt->rt6i_flags = RTF_ROUTEINFO |
761                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
762
763         if (rt) {
764                 if (!addrconf_finite_timeout(lifetime))
765                         rt6_clean_expires(rt);
766                 else
767                         rt6_set_expires(rt, jiffies + HZ * lifetime);
768
769                 ip6_rt_put(rt);
770         }
771         return 0;
772 }
773 #endif
774
775 #define BACKTRACK(__net, saddr)                 \
776 do { \
777         if (rt == __net->ipv6.ip6_null_entry) { \
778                 struct fib6_node *pn; \
779                 while (1) { \
780                         if (fn->fn_flags & RTN_TL_ROOT) \
781                                 goto out; \
782                         pn = fn->parent; \
783                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
784                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
785                         else \
786                                 fn = pn; \
787                         if (fn->fn_flags & RTN_RTINFO) \
788                                 goto restart; \
789                 } \
790         } \
791 } while (0)
792
793 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
794                                              struct fib6_table *table,
795                                              struct flowi6 *fl6, int flags)
796 {
797         struct fib6_node *fn;
798         struct rt6_info *rt;
799
800         read_lock_bh(&table->tb6_lock);
801         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
802 restart:
803         rt = fn->leaf;
804         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
805         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
806                 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
807         BACKTRACK(net, &fl6->saddr);
808 out:
809         dst_use(&rt->dst, jiffies);
810         read_unlock_bh(&table->tb6_lock);
811         return rt;
812
813 }
814
815 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
816                                     int flags)
817 {
818         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
819 }
820 EXPORT_SYMBOL_GPL(ip6_route_lookup);
821
822 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
823                             const struct in6_addr *saddr, int oif, int strict)
824 {
825         struct flowi6 fl6 = {
826                 .flowi6_oif = oif,
827                 .daddr = *daddr,
828         };
829         struct dst_entry *dst;
830         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
831
832         if (saddr) {
833                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
834                 flags |= RT6_LOOKUP_F_HAS_SADDR;
835         }
836
837         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
838         if (dst->error == 0)
839                 return (struct rt6_info *) dst;
840
841         dst_release(dst);
842
843         return NULL;
844 }
845
846 EXPORT_SYMBOL(rt6_lookup);
847
848 /* ip6_ins_rt is called with FREE table->tb6_lock.
849    It takes new route entry, the addition fails by any reason the
850    route is freed. In any case, if caller does not hold it, it may
851    be destroyed.
852  */
853
854 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
855                         struct nlattr *mx, int mx_len)
856 {
857         int err;
858         struct fib6_table *table;
859
860         table = rt->rt6i_table;
861         write_lock_bh(&table->tb6_lock);
862         err = fib6_add(&table->tb6_root, rt, info, mx, mx_len);
863         write_unlock_bh(&table->tb6_lock);
864
865         return err;
866 }
867
868 int ip6_ins_rt(struct rt6_info *rt)
869 {
870         struct nl_info info = {
871                 .nl_net = dev_net(rt->dst.dev),
872         };
873         return __ip6_ins_rt(rt, &info, NULL, 0);
874 }
875
876 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
877                                       const struct in6_addr *daddr,
878                                       const struct in6_addr *saddr)
879 {
880         struct rt6_info *rt;
881
882         /*
883          *      Clone the route.
884          */
885
886         rt = ip6_rt_copy(ort, daddr);
887
888         if (rt) {
889                 if (ort->rt6i_dst.plen != 128 &&
890                     ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
891                         rt->rt6i_flags |= RTF_ANYCAST;
892
893                 rt->rt6i_flags |= RTF_CACHE;
894
895 #ifdef CONFIG_IPV6_SUBTREES
896                 if (rt->rt6i_src.plen && saddr) {
897                         rt->rt6i_src.addr = *saddr;
898                         rt->rt6i_src.plen = 128;
899                 }
900 #endif
901         }
902
903         return rt;
904 }
905
906 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
907                                         const struct in6_addr *daddr)
908 {
909         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
910
911         if (rt)
912                 rt->rt6i_flags |= RTF_CACHE;
913         return rt;
914 }
915
916 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
917                                       struct flowi6 *fl6, int flags)
918 {
919         struct fib6_node *fn;
920         struct rt6_info *rt, *nrt;
921         int strict = 0;
922         int attempts = 3;
923         int err;
924         int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
925
926         strict |= flags & RT6_LOOKUP_F_IFACE;
927
928 relookup:
929         read_lock_bh(&table->tb6_lock);
930
931 restart_2:
932         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
933
934 restart:
935         rt = rt6_select(fn, oif, strict | reachable);
936         if (rt->rt6i_nsiblings)
937                 rt = rt6_multipath_select(rt, fl6, oif, strict | reachable);
938         BACKTRACK(net, &fl6->saddr);
939         if (rt == net->ipv6.ip6_null_entry ||
940             rt->rt6i_flags & RTF_CACHE)
941                 goto out;
942
943         dst_hold(&rt->dst);
944         read_unlock_bh(&table->tb6_lock);
945
946         if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
947                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
948         else if (!(rt->dst.flags & DST_HOST))
949                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
950         else
951                 goto out2;
952
953         ip6_rt_put(rt);
954         rt = nrt ? : net->ipv6.ip6_null_entry;
955
956         dst_hold(&rt->dst);
957         if (nrt) {
958                 err = ip6_ins_rt(nrt);
959                 if (!err)
960                         goto out2;
961         }
962
963         if (--attempts <= 0)
964                 goto out2;
965
966         /*
967          * Race condition! In the gap, when table->tb6_lock was
968          * released someone could insert this route.  Relookup.
969          */
970         ip6_rt_put(rt);
971         goto relookup;
972
973 out:
974         if (reachable) {
975                 reachable = 0;
976                 goto restart_2;
977         }
978         dst_hold(&rt->dst);
979         read_unlock_bh(&table->tb6_lock);
980 out2:
981         rt->dst.lastuse = jiffies;
982         rt->dst.__use++;
983
984         return rt;
985 }
986
987 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
988                                             struct flowi6 *fl6, int flags)
989 {
990         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
991 }
992
993 static struct dst_entry *ip6_route_input_lookup(struct net *net,
994                                                 struct net_device *dev,
995                                                 struct flowi6 *fl6, int flags)
996 {
997         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
998                 flags |= RT6_LOOKUP_F_IFACE;
999
1000         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1001 }
1002
1003 void ip6_route_input(struct sk_buff *skb)
1004 {
1005         const struct ipv6hdr *iph = ipv6_hdr(skb);
1006         struct net *net = dev_net(skb->dev);
1007         int flags = RT6_LOOKUP_F_HAS_SADDR;
1008         struct flowi6 fl6 = {
1009                 .flowi6_iif = skb->dev->ifindex,
1010                 .daddr = iph->daddr,
1011                 .saddr = iph->saddr,
1012                 .flowlabel = ip6_flowinfo(iph),
1013                 .flowi6_mark = skb->mark,
1014                 .flowi6_proto = iph->nexthdr,
1015         };
1016
1017         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1018 }
1019
1020 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1021                                              struct flowi6 *fl6, int flags)
1022 {
1023         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1024 }
1025
1026 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
1027                                     struct flowi6 *fl6)
1028 {
1029         int flags = 0;
1030
1031         fl6->flowi6_iif = LOOPBACK_IFINDEX;
1032
1033         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1034                 flags |= RT6_LOOKUP_F_IFACE;
1035
1036         if (!ipv6_addr_any(&fl6->saddr))
1037                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1038         else if (sk)
1039                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1040
1041         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1042 }
1043
1044 EXPORT_SYMBOL(ip6_route_output);
1045
1046 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1047 {
1048         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1049         struct dst_entry *new = NULL;
1050
1051         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1052         if (rt) {
1053                 new = &rt->dst;
1054
1055                 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1056                 rt6_init_peer(rt, net->ipv6.peers);
1057
1058                 new->__use = 1;
1059                 new->input = dst_discard;
1060                 new->output = dst_discard_sk;
1061
1062                 if (dst_metrics_read_only(&ort->dst))
1063                         new->_metrics = ort->dst._metrics;
1064                 else
1065                         dst_copy_metrics(new, &ort->dst);
1066                 rt->rt6i_idev = ort->rt6i_idev;
1067                 if (rt->rt6i_idev)
1068                         in6_dev_hold(rt->rt6i_idev);
1069
1070                 rt->rt6i_gateway = ort->rt6i_gateway;
1071                 rt->rt6i_flags = ort->rt6i_flags;
1072                 rt->rt6i_metric = 0;
1073
1074                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1075 #ifdef CONFIG_IPV6_SUBTREES
1076                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1077 #endif
1078
1079                 dst_free(new);
1080         }
1081
1082         dst_release(dst_orig);
1083         return new ? new : ERR_PTR(-ENOMEM);
1084 }
1085
1086 /*
1087  *      Destination cache support functions
1088  */
1089
1090 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1091 {
1092         struct rt6_info *rt;
1093
1094         rt = (struct rt6_info *) dst;
1095
1096         /* All IPV6 dsts are created with ->obsolete set to the value
1097          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1098          * into this function always.
1099          */
1100         if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1101                 return NULL;
1102
1103         if (rt6_check_expired(rt))
1104                 return NULL;
1105
1106         return dst;
1107 }
1108
1109 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1110 {
1111         struct rt6_info *rt = (struct rt6_info *) dst;
1112
1113         if (rt) {
1114                 if (rt->rt6i_flags & RTF_CACHE) {
1115                         if (rt6_check_expired(rt)) {
1116                                 ip6_del_rt(rt);
1117                                 dst = NULL;
1118                         }
1119                 } else {
1120                         dst_release(dst);
1121                         dst = NULL;
1122                 }
1123         }
1124         return dst;
1125 }
1126
1127 static void ip6_link_failure(struct sk_buff *skb)
1128 {
1129         struct rt6_info *rt;
1130
1131         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1132
1133         rt = (struct rt6_info *) skb_dst(skb);
1134         if (rt) {
1135                 if (rt->rt6i_flags & RTF_CACHE) {
1136                         dst_hold(&rt->dst);
1137                         if (ip6_del_rt(rt))
1138                                 dst_free(&rt->dst);
1139                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1140                         rt->rt6i_node->fn_sernum = -1;
1141                 }
1142         }
1143 }
1144
1145 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1146                                struct sk_buff *skb, u32 mtu)
1147 {
1148         struct rt6_info *rt6 = (struct rt6_info*)dst;
1149
1150         dst_confirm(dst);
1151         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1152                 struct net *net = dev_net(dst->dev);
1153
1154                 rt6->rt6i_flags |= RTF_MODIFIED;
1155                 if (mtu < IPV6_MIN_MTU) {
1156                         u32 features = dst_metric(dst, RTAX_FEATURES);
1157                         mtu = IPV6_MIN_MTU;
1158                         features |= RTAX_FEATURE_ALLFRAG;
1159                         dst_metric_set(dst, RTAX_FEATURES, features);
1160                 }
1161                 dst_metric_set(dst, RTAX_MTU, mtu);
1162                 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1163         }
1164 }
1165
1166 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1167                      int oif, u32 mark)
1168 {
1169         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1170         struct dst_entry *dst;
1171         struct flowi6 fl6;
1172
1173         memset(&fl6, 0, sizeof(fl6));
1174         fl6.flowi6_oif = oif;
1175         fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1176         fl6.daddr = iph->daddr;
1177         fl6.saddr = iph->saddr;
1178         fl6.flowlabel = ip6_flowinfo(iph);
1179
1180         dst = ip6_route_output(net, NULL, &fl6);
1181         if (!dst->error)
1182                 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1183         dst_release(dst);
1184 }
1185 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1186
1187 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1188 {
1189         ip6_update_pmtu(skb, sock_net(sk), mtu,
1190                         sk->sk_bound_dev_if, sk->sk_mark);
1191 }
1192 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1193
1194 /* Handle redirects */
1195 struct ip6rd_flowi {
1196         struct flowi6 fl6;
1197         struct in6_addr gateway;
1198 };
1199
1200 static struct rt6_info *__ip6_route_redirect(struct net *net,
1201                                              struct fib6_table *table,
1202                                              struct flowi6 *fl6,
1203                                              int flags)
1204 {
1205         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1206         struct rt6_info *rt;
1207         struct fib6_node *fn;
1208
1209         /* Get the "current" route for this destination and
1210          * check if the redirect has come from approriate router.
1211          *
1212          * RFC 4861 specifies that redirects should only be
1213          * accepted if they come from the nexthop to the target.
1214          * Due to the way the routes are chosen, this notion
1215          * is a bit fuzzy and one might need to check all possible
1216          * routes.
1217          */
1218
1219         read_lock_bh(&table->tb6_lock);
1220         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1221 restart:
1222         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1223                 if (rt6_check_expired(rt))
1224                         continue;
1225                 if (rt->dst.error)
1226                         break;
1227                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1228                         continue;
1229                 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1230                         continue;
1231                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1232                         continue;
1233                 break;
1234         }
1235
1236         if (!rt)
1237                 rt = net->ipv6.ip6_null_entry;
1238         else if (rt->dst.error) {
1239                 rt = net->ipv6.ip6_null_entry;
1240                 goto out;
1241         }
1242         BACKTRACK(net, &fl6->saddr);
1243 out:
1244         dst_hold(&rt->dst);
1245
1246         read_unlock_bh(&table->tb6_lock);
1247
1248         return rt;
1249 };
1250
1251 static struct dst_entry *ip6_route_redirect(struct net *net,
1252                                         const struct flowi6 *fl6,
1253                                         const struct in6_addr *gateway)
1254 {
1255         int flags = RT6_LOOKUP_F_HAS_SADDR;
1256         struct ip6rd_flowi rdfl;
1257
1258         rdfl.fl6 = *fl6;
1259         rdfl.gateway = *gateway;
1260
1261         return fib6_rule_lookup(net, &rdfl.fl6,
1262                                 flags, __ip6_route_redirect);
1263 }
1264
1265 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1266 {
1267         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1268         struct dst_entry *dst;
1269         struct flowi6 fl6;
1270
1271         memset(&fl6, 0, sizeof(fl6));
1272         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1273         fl6.flowi6_oif = oif;
1274         fl6.flowi6_mark = mark;
1275         fl6.daddr = iph->daddr;
1276         fl6.saddr = iph->saddr;
1277         fl6.flowlabel = ip6_flowinfo(iph);
1278
1279         dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1280         rt6_do_redirect(dst, NULL, skb);
1281         dst_release(dst);
1282 }
1283 EXPORT_SYMBOL_GPL(ip6_redirect);
1284
1285 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1286                             u32 mark)
1287 {
1288         const struct ipv6hdr *iph = ipv6_hdr(skb);
1289         const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1290         struct dst_entry *dst;
1291         struct flowi6 fl6;
1292
1293         memset(&fl6, 0, sizeof(fl6));
1294         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1295         fl6.flowi6_oif = oif;
1296         fl6.flowi6_mark = mark;
1297         fl6.daddr = msg->dest;
1298         fl6.saddr = iph->daddr;
1299
1300         dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1301         rt6_do_redirect(dst, NULL, skb);
1302         dst_release(dst);
1303 }
1304
1305 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1306 {
1307         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1308 }
1309 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1310
1311 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1312 {
1313         struct net_device *dev = dst->dev;
1314         unsigned int mtu = dst_mtu(dst);
1315         struct net *net = dev_net(dev);
1316
1317         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1318
1319         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1320                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1321
1322         /*
1323          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1324          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1325          * IPV6_MAXPLEN is also valid and means: "any MSS,
1326          * rely only on pmtu discovery"
1327          */
1328         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1329                 mtu = IPV6_MAXPLEN;
1330         return mtu;
1331 }
1332
1333 static unsigned int ip6_mtu(const struct dst_entry *dst)
1334 {
1335         struct inet6_dev *idev;
1336         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1337
1338         if (mtu)
1339                 goto out;
1340
1341         mtu = IPV6_MIN_MTU;
1342
1343         rcu_read_lock();
1344         idev = __in6_dev_get(dst->dev);
1345         if (idev)
1346                 mtu = idev->cnf.mtu6;
1347         rcu_read_unlock();
1348
1349 out:
1350         return min_t(unsigned int, mtu, IP6_MAX_MTU);
1351 }
1352
1353 static struct dst_entry *icmp6_dst_gc_list;
1354 static DEFINE_SPINLOCK(icmp6_dst_lock);
1355
1356 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1357                                   struct flowi6 *fl6)
1358 {
1359         struct dst_entry *dst;
1360         struct rt6_info *rt;
1361         struct inet6_dev *idev = in6_dev_get(dev);
1362         struct net *net = dev_net(dev);
1363
1364         if (unlikely(!idev))
1365                 return ERR_PTR(-ENODEV);
1366
1367         rt = ip6_dst_alloc(net, dev, 0, NULL);
1368         if (unlikely(!rt)) {
1369                 in6_dev_put(idev);
1370                 dst = ERR_PTR(-ENOMEM);
1371                 goto out;
1372         }
1373
1374         rt->dst.flags |= DST_HOST;
1375         rt->dst.output  = ip6_output;
1376         atomic_set(&rt->dst.__refcnt, 1);
1377         rt->rt6i_gateway  = fl6->daddr;
1378         rt->rt6i_dst.addr = fl6->daddr;
1379         rt->rt6i_dst.plen = 128;
1380         rt->rt6i_idev     = idev;
1381         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1382
1383         spin_lock_bh(&icmp6_dst_lock);
1384         rt->dst.next = icmp6_dst_gc_list;
1385         icmp6_dst_gc_list = &rt->dst;
1386         spin_unlock_bh(&icmp6_dst_lock);
1387
1388         fib6_force_start_gc(net);
1389
1390         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1391
1392 out:
1393         return dst;
1394 }
1395
1396 int icmp6_dst_gc(void)
1397 {
1398         struct dst_entry *dst, **pprev;
1399         int more = 0;
1400
1401         spin_lock_bh(&icmp6_dst_lock);
1402         pprev = &icmp6_dst_gc_list;
1403
1404         while ((dst = *pprev) != NULL) {
1405                 if (!atomic_read(&dst->__refcnt)) {
1406                         *pprev = dst->next;
1407                         dst_free(dst);
1408                 } else {
1409                         pprev = &dst->next;
1410                         ++more;
1411                 }
1412         }
1413
1414         spin_unlock_bh(&icmp6_dst_lock);
1415
1416         return more;
1417 }
1418
1419 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1420                             void *arg)
1421 {
1422         struct dst_entry *dst, **pprev;
1423
1424         spin_lock_bh(&icmp6_dst_lock);
1425         pprev = &icmp6_dst_gc_list;
1426         while ((dst = *pprev) != NULL) {
1427                 struct rt6_info *rt = (struct rt6_info *) dst;
1428                 if (func(rt, arg)) {
1429                         *pprev = dst->next;
1430                         dst_free(dst);
1431                 } else {
1432                         pprev = &dst->next;
1433                 }
1434         }
1435         spin_unlock_bh(&icmp6_dst_lock);
1436 }
1437
1438 static int ip6_dst_gc(struct dst_ops *ops)
1439 {
1440         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1441         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1442         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1443         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1444         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1445         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1446         int entries;
1447
1448         entries = dst_entries_get_fast(ops);
1449         if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1450             entries <= rt_max_size)
1451                 goto out;
1452
1453         net->ipv6.ip6_rt_gc_expire++;
1454         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1455         entries = dst_entries_get_slow(ops);
1456         if (entries < ops->gc_thresh)
1457                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1458 out:
1459         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1460         return entries > rt_max_size;
1461 }
1462
1463 /*
1464  *
1465  */
1466
1467 int ip6_route_add(struct fib6_config *cfg)
1468 {
1469         int err;
1470         struct net *net = cfg->fc_nlinfo.nl_net;
1471         struct rt6_info *rt = NULL;
1472         struct net_device *dev = NULL;
1473         struct inet6_dev *idev = NULL;
1474         struct fib6_table *table;
1475         int addr_type;
1476
1477         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1478                 return -EINVAL;
1479 #ifndef CONFIG_IPV6_SUBTREES
1480         if (cfg->fc_src_len)
1481                 return -EINVAL;
1482 #endif
1483         if (cfg->fc_ifindex) {
1484                 err = -ENODEV;
1485                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1486                 if (!dev)
1487                         goto out;
1488                 idev = in6_dev_get(dev);
1489                 if (!idev)
1490                         goto out;
1491         }
1492
1493         if (cfg->fc_metric == 0)
1494                 cfg->fc_metric = IP6_RT_PRIO_USER;
1495
1496         err = -ENOBUFS;
1497         if (cfg->fc_nlinfo.nlh &&
1498             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1499                 table = fib6_get_table(net, cfg->fc_table);
1500                 if (!table) {
1501                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1502                         table = fib6_new_table(net, cfg->fc_table);
1503                 }
1504         } else {
1505                 table = fib6_new_table(net, cfg->fc_table);
1506         }
1507
1508         if (!table)
1509                 goto out;
1510
1511         rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
1512
1513         if (!rt) {
1514                 err = -ENOMEM;
1515                 goto out;
1516         }
1517
1518         if (cfg->fc_flags & RTF_EXPIRES)
1519                 rt6_set_expires(rt, jiffies +
1520                                 clock_t_to_jiffies(cfg->fc_expires));
1521         else
1522                 rt6_clean_expires(rt);
1523
1524         if (cfg->fc_protocol == RTPROT_UNSPEC)
1525                 cfg->fc_protocol = RTPROT_BOOT;
1526         rt->rt6i_protocol = cfg->fc_protocol;
1527
1528         addr_type = ipv6_addr_type(&cfg->fc_dst);
1529
1530         if (addr_type & IPV6_ADDR_MULTICAST)
1531                 rt->dst.input = ip6_mc_input;
1532         else if (cfg->fc_flags & RTF_LOCAL)
1533                 rt->dst.input = ip6_input;
1534         else
1535                 rt->dst.input = ip6_forward;
1536
1537         rt->dst.output = ip6_output;
1538
1539         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1540         rt->rt6i_dst.plen = cfg->fc_dst_len;
1541         if (rt->rt6i_dst.plen == 128) {
1542                 rt->dst.flags |= DST_HOST;
1543                 dst_metrics_set_force_overwrite(&rt->dst);
1544         }
1545
1546 #ifdef CONFIG_IPV6_SUBTREES
1547         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1548         rt->rt6i_src.plen = cfg->fc_src_len;
1549 #endif
1550
1551         rt->rt6i_metric = cfg->fc_metric;
1552
1553         /* We cannot add true routes via loopback here,
1554            they would result in kernel looping; promote them to reject routes
1555          */
1556         if ((cfg->fc_flags & RTF_REJECT) ||
1557             (dev && (dev->flags & IFF_LOOPBACK) &&
1558              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1559              !(cfg->fc_flags & RTF_LOCAL))) {
1560                 /* hold loopback dev/idev if we haven't done so. */
1561                 if (dev != net->loopback_dev) {
1562                         if (dev) {
1563                                 dev_put(dev);
1564                                 in6_dev_put(idev);
1565                         }
1566                         dev = net->loopback_dev;
1567                         dev_hold(dev);
1568                         idev = in6_dev_get(dev);
1569                         if (!idev) {
1570                                 err = -ENODEV;
1571                                 goto out;
1572                         }
1573                 }
1574                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1575                 switch (cfg->fc_type) {
1576                 case RTN_BLACKHOLE:
1577                         rt->dst.error = -EINVAL;
1578                         rt->dst.output = dst_discard_sk;
1579                         rt->dst.input = dst_discard;
1580                         break;
1581                 case RTN_PROHIBIT:
1582                         rt->dst.error = -EACCES;
1583                         rt->dst.output = ip6_pkt_prohibit_out;
1584                         rt->dst.input = ip6_pkt_prohibit;
1585                         break;
1586                 case RTN_THROW:
1587                 default:
1588                         rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1589                                         : -ENETUNREACH;
1590                         rt->dst.output = ip6_pkt_discard_out;
1591                         rt->dst.input = ip6_pkt_discard;
1592                         break;
1593                 }
1594                 goto install_route;
1595         }
1596
1597         if (cfg->fc_flags & RTF_GATEWAY) {
1598                 const struct in6_addr *gw_addr;
1599                 int gwa_type;
1600
1601                 gw_addr = &cfg->fc_gateway;
1602                 rt->rt6i_gateway = *gw_addr;
1603                 gwa_type = ipv6_addr_type(gw_addr);
1604
1605                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1606                         struct rt6_info *grt;
1607
1608                         /* IPv6 strictly inhibits using not link-local
1609                            addresses as nexthop address.
1610                            Otherwise, router will not able to send redirects.
1611                            It is very good, but in some (rare!) circumstances
1612                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1613                            some exceptions. --ANK
1614                          */
1615                         err = -EINVAL;
1616                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1617                                 goto out;
1618
1619                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1620
1621                         err = -EHOSTUNREACH;
1622                         if (!grt)
1623                                 goto out;
1624                         if (dev) {
1625                                 if (dev != grt->dst.dev) {
1626                                         ip6_rt_put(grt);
1627                                         goto out;
1628                                 }
1629                         } else {
1630                                 dev = grt->dst.dev;
1631                                 idev = grt->rt6i_idev;
1632                                 dev_hold(dev);
1633                                 in6_dev_hold(grt->rt6i_idev);
1634                         }
1635                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1636                                 err = 0;
1637                         ip6_rt_put(grt);
1638
1639                         if (err)
1640                                 goto out;
1641                 }
1642                 err = -EINVAL;
1643                 if (!dev || (dev->flags & IFF_LOOPBACK))
1644                         goto out;
1645         }
1646
1647         err = -ENODEV;
1648         if (!dev)
1649                 goto out;
1650
1651         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1652                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1653                         err = -EINVAL;
1654                         goto out;
1655                 }
1656                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1657                 rt->rt6i_prefsrc.plen = 128;
1658         } else
1659                 rt->rt6i_prefsrc.plen = 0;
1660
1661         rt->rt6i_flags = cfg->fc_flags;
1662
1663 install_route:
1664         rt->dst.dev = dev;
1665         rt->rt6i_idev = idev;
1666         rt->rt6i_table = table;
1667
1668         cfg->fc_nlinfo.nl_net = dev_net(dev);
1669
1670         return __ip6_ins_rt(rt, &cfg->fc_nlinfo, cfg->fc_mx, cfg->fc_mx_len);
1671
1672 out:
1673         if (dev)
1674                 dev_put(dev);
1675         if (idev)
1676                 in6_dev_put(idev);
1677         if (rt)
1678                 dst_free(&rt->dst);
1679         return err;
1680 }
1681
1682 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1683 {
1684         int err;
1685         struct fib6_table *table;
1686         struct net *net = dev_net(rt->dst.dev);
1687
1688         if (rt == net->ipv6.ip6_null_entry) {
1689                 err = -ENOENT;
1690                 goto out;
1691         }
1692
1693         table = rt->rt6i_table;
1694         write_lock_bh(&table->tb6_lock);
1695         err = fib6_del(rt, info);
1696         write_unlock_bh(&table->tb6_lock);
1697
1698 out:
1699         ip6_rt_put(rt);
1700         return err;
1701 }
1702
1703 int ip6_del_rt(struct rt6_info *rt)
1704 {
1705         struct nl_info info = {
1706                 .nl_net = dev_net(rt->dst.dev),
1707         };
1708         return __ip6_del_rt(rt, &info);
1709 }
1710
1711 static int ip6_route_del(struct fib6_config *cfg)
1712 {
1713         struct fib6_table *table;
1714         struct fib6_node *fn;
1715         struct rt6_info *rt;
1716         int err = -ESRCH;
1717
1718         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1719         if (!table)
1720                 return err;
1721
1722         read_lock_bh(&table->tb6_lock);
1723
1724         fn = fib6_locate(&table->tb6_root,
1725                          &cfg->fc_dst, cfg->fc_dst_len,
1726                          &cfg->fc_src, cfg->fc_src_len);
1727
1728         if (fn) {
1729                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1730                         if (cfg->fc_ifindex &&
1731                             (!rt->dst.dev ||
1732                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1733                                 continue;
1734                         if (cfg->fc_flags & RTF_GATEWAY &&
1735                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1736                                 continue;
1737                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1738                                 continue;
1739                         dst_hold(&rt->dst);
1740                         read_unlock_bh(&table->tb6_lock);
1741
1742                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1743                 }
1744         }
1745         read_unlock_bh(&table->tb6_lock);
1746
1747         return err;
1748 }
1749
1750 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1751 {
1752         struct net *net = dev_net(skb->dev);
1753         struct netevent_redirect netevent;
1754         struct rt6_info *rt, *nrt = NULL;
1755         struct ndisc_options ndopts;
1756         struct inet6_dev *in6_dev;
1757         struct neighbour *neigh;
1758         struct rd_msg *msg;
1759         int optlen, on_link;
1760         u8 *lladdr;
1761
1762         optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1763         optlen -= sizeof(*msg);
1764
1765         if (optlen < 0) {
1766                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1767                 return;
1768         }
1769
1770         msg = (struct rd_msg *)icmp6_hdr(skb);
1771
1772         if (ipv6_addr_is_multicast(&msg->dest)) {
1773                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1774                 return;
1775         }
1776
1777         on_link = 0;
1778         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1779                 on_link = 1;
1780         } else if (ipv6_addr_type(&msg->target) !=
1781                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1782                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1783                 return;
1784         }
1785
1786         in6_dev = __in6_dev_get(skb->dev);
1787         if (!in6_dev)
1788                 return;
1789         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1790                 return;
1791
1792         /* RFC2461 8.1:
1793          *      The IP source address of the Redirect MUST be the same as the current
1794          *      first-hop router for the specified ICMP Destination Address.
1795          */
1796
1797         if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1798                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1799                 return;
1800         }
1801
1802         lladdr = NULL;
1803         if (ndopts.nd_opts_tgt_lladdr) {
1804                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1805                                              skb->dev);
1806                 if (!lladdr) {
1807                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1808                         return;
1809                 }
1810         }
1811
1812         rt = (struct rt6_info *) dst;
1813         if (rt == net->ipv6.ip6_null_entry) {
1814                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1815                 return;
1816         }
1817
1818         /* Redirect received -> path was valid.
1819          * Look, redirects are sent only in response to data packets,
1820          * so that this nexthop apparently is reachable. --ANK
1821          */
1822         dst_confirm(&rt->dst);
1823
1824         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1825         if (!neigh)
1826                 return;
1827
1828         /*
1829          *      We have finally decided to accept it.
1830          */
1831
1832         neigh_update(neigh, lladdr, NUD_STALE,
1833                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1834                      NEIGH_UPDATE_F_OVERRIDE|
1835                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1836                                      NEIGH_UPDATE_F_ISROUTER))
1837                      );
1838
1839         nrt = ip6_rt_copy(rt, &msg->dest);
1840         if (!nrt)
1841                 goto out;
1842
1843         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1844         if (on_link)
1845                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1846
1847         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1848
1849         if (ip6_ins_rt(nrt))
1850                 goto out;
1851
1852         netevent.old = &rt->dst;
1853         netevent.new = &nrt->dst;
1854         netevent.daddr = &msg->dest;
1855         netevent.neigh = neigh;
1856         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1857
1858         if (rt->rt6i_flags & RTF_CACHE) {
1859                 rt = (struct rt6_info *) dst_clone(&rt->dst);
1860                 ip6_del_rt(rt);
1861         }
1862
1863 out:
1864         neigh_release(neigh);
1865 }
1866
1867 /*
1868  *      Misc support functions
1869  */
1870
1871 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1872                                     const struct in6_addr *dest)
1873 {
1874         struct net *net = dev_net(ort->dst.dev);
1875         struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1876                                             ort->rt6i_table);
1877
1878         if (rt) {
1879                 rt->dst.input = ort->dst.input;
1880                 rt->dst.output = ort->dst.output;
1881                 rt->dst.flags |= DST_HOST;
1882
1883                 rt->rt6i_dst.addr = *dest;
1884                 rt->rt6i_dst.plen = 128;
1885                 dst_copy_metrics(&rt->dst, &ort->dst);
1886                 rt->dst.error = ort->dst.error;
1887                 rt->rt6i_idev = ort->rt6i_idev;
1888                 if (rt->rt6i_idev)
1889                         in6_dev_hold(rt->rt6i_idev);
1890                 rt->dst.lastuse = jiffies;
1891
1892                 if (ort->rt6i_flags & RTF_GATEWAY)
1893                         rt->rt6i_gateway = ort->rt6i_gateway;
1894                 else
1895                         rt->rt6i_gateway = *dest;
1896                 rt->rt6i_flags = ort->rt6i_flags;
1897                 rt6_set_from(rt, ort);
1898                 rt->rt6i_metric = 0;
1899
1900 #ifdef CONFIG_IPV6_SUBTREES
1901                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1902 #endif
1903                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1904                 rt->rt6i_table = ort->rt6i_table;
1905         }
1906         return rt;
1907 }
1908
1909 #ifdef CONFIG_IPV6_ROUTE_INFO
1910 static struct rt6_info *rt6_get_route_info(struct net *net,
1911                                            const struct in6_addr *prefix, int prefixlen,
1912                                            const struct in6_addr *gwaddr, int ifindex)
1913 {
1914         struct fib6_node *fn;
1915         struct rt6_info *rt = NULL;
1916         struct fib6_table *table;
1917
1918         table = fib6_get_table(net, RT6_TABLE_INFO);
1919         if (!table)
1920                 return NULL;
1921
1922         read_lock_bh(&table->tb6_lock);
1923         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1924         if (!fn)
1925                 goto out;
1926
1927         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1928                 if (rt->dst.dev->ifindex != ifindex)
1929                         continue;
1930                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1931                         continue;
1932                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1933                         continue;
1934                 dst_hold(&rt->dst);
1935                 break;
1936         }
1937 out:
1938         read_unlock_bh(&table->tb6_lock);
1939         return rt;
1940 }
1941
1942 static struct rt6_info *rt6_add_route_info(struct net *net,
1943                                            const struct in6_addr *prefix, int prefixlen,
1944                                            const struct in6_addr *gwaddr, int ifindex,
1945                                            unsigned int pref)
1946 {
1947         struct fib6_config cfg = {
1948                 .fc_table       = RT6_TABLE_INFO,
1949                 .fc_metric      = IP6_RT_PRIO_USER,
1950                 .fc_ifindex     = ifindex,
1951                 .fc_dst_len     = prefixlen,
1952                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1953                                   RTF_UP | RTF_PREF(pref),
1954                 .fc_nlinfo.portid = 0,
1955                 .fc_nlinfo.nlh = NULL,
1956                 .fc_nlinfo.nl_net = net,
1957         };
1958
1959         cfg.fc_dst = *prefix;
1960         cfg.fc_gateway = *gwaddr;
1961
1962         /* We should treat it as a default route if prefix length is 0. */
1963         if (!prefixlen)
1964                 cfg.fc_flags |= RTF_DEFAULT;
1965
1966         ip6_route_add(&cfg);
1967
1968         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1969 }
1970 #endif
1971
1972 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1973 {
1974         struct rt6_info *rt;
1975         struct fib6_table *table;
1976
1977         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1978         if (!table)
1979                 return NULL;
1980
1981         read_lock_bh(&table->tb6_lock);
1982         for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1983                 if (dev == rt->dst.dev &&
1984                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1985                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1986                         break;
1987         }
1988         if (rt)
1989                 dst_hold(&rt->dst);
1990         read_unlock_bh(&table->tb6_lock);
1991         return rt;
1992 }
1993
1994 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1995                                      struct net_device *dev,
1996                                      unsigned int pref)
1997 {
1998         struct fib6_config cfg = {
1999                 .fc_table       = RT6_TABLE_DFLT,
2000                 .fc_metric      = IP6_RT_PRIO_USER,
2001                 .fc_ifindex     = dev->ifindex,
2002                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2003                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2004                 .fc_nlinfo.portid = 0,
2005                 .fc_nlinfo.nlh = NULL,
2006                 .fc_nlinfo.nl_net = dev_net(dev),
2007         };
2008
2009         cfg.fc_gateway = *gwaddr;
2010
2011         ip6_route_add(&cfg);
2012
2013         return rt6_get_dflt_router(gwaddr, dev);
2014 }
2015
2016 void rt6_purge_dflt_routers(struct net *net)
2017 {
2018         struct rt6_info *rt;
2019         struct fib6_table *table;
2020
2021         /* NOTE: Keep consistent with rt6_get_dflt_router */
2022         table = fib6_get_table(net, RT6_TABLE_DFLT);
2023         if (!table)
2024                 return;
2025
2026 restart:
2027         read_lock_bh(&table->tb6_lock);
2028         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2029                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2030                     (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2031                         dst_hold(&rt->dst);
2032                         read_unlock_bh(&table->tb6_lock);
2033                         ip6_del_rt(rt);
2034                         goto restart;
2035                 }
2036         }
2037         read_unlock_bh(&table->tb6_lock);
2038 }
2039
2040 static void rtmsg_to_fib6_config(struct net *net,
2041                                  struct in6_rtmsg *rtmsg,
2042                                  struct fib6_config *cfg)
2043 {
2044         memset(cfg, 0, sizeof(*cfg));
2045
2046         cfg->fc_table = RT6_TABLE_MAIN;
2047         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2048         cfg->fc_metric = rtmsg->rtmsg_metric;
2049         cfg->fc_expires = rtmsg->rtmsg_info;
2050         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2051         cfg->fc_src_len = rtmsg->rtmsg_src_len;
2052         cfg->fc_flags = rtmsg->rtmsg_flags;
2053
2054         cfg->fc_nlinfo.nl_net = net;
2055
2056         cfg->fc_dst = rtmsg->rtmsg_dst;
2057         cfg->fc_src = rtmsg->rtmsg_src;
2058         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2059 }
2060
2061 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2062 {
2063         struct fib6_config cfg;
2064         struct in6_rtmsg rtmsg;
2065         int err;
2066
2067         switch(cmd) {
2068         case SIOCADDRT:         /* Add a route */
2069         case SIOCDELRT:         /* Delete a route */
2070                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2071                         return -EPERM;
2072                 err = copy_from_user(&rtmsg, arg,
2073                                      sizeof(struct in6_rtmsg));
2074                 if (err)
2075                         return -EFAULT;
2076
2077                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2078
2079                 rtnl_lock();
2080                 switch (cmd) {
2081                 case SIOCADDRT:
2082                         err = ip6_route_add(&cfg);
2083                         break;
2084                 case SIOCDELRT:
2085                         err = ip6_route_del(&cfg);
2086                         break;
2087                 default:
2088                         err = -EINVAL;
2089                 }
2090                 rtnl_unlock();
2091
2092                 return err;
2093         }
2094
2095         return -EINVAL;
2096 }
2097
2098 /*
2099  *      Drop the packet on the floor
2100  */
2101
2102 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2103 {
2104         int type;
2105         struct dst_entry *dst = skb_dst(skb);
2106         switch (ipstats_mib_noroutes) {
2107         case IPSTATS_MIB_INNOROUTES:
2108                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2109                 if (type == IPV6_ADDR_ANY) {
2110                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2111                                       IPSTATS_MIB_INADDRERRORS);
2112                         break;
2113                 }
2114                 /* FALLTHROUGH */
2115         case IPSTATS_MIB_OUTNOROUTES:
2116                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2117                               ipstats_mib_noroutes);
2118                 break;
2119         }
2120         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2121         kfree_skb(skb);
2122         return 0;
2123 }
2124
2125 static int ip6_pkt_discard(struct sk_buff *skb)
2126 {
2127         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2128 }
2129
2130 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
2131 {
2132         skb->dev = skb_dst(skb)->dev;
2133         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2134 }
2135
2136 static int ip6_pkt_prohibit(struct sk_buff *skb)
2137 {
2138         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2139 }
2140
2141 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
2142 {
2143         skb->dev = skb_dst(skb)->dev;
2144         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2145 }
2146
2147 /*
2148  *      Allocate a dst for local (unicast / anycast) address.
2149  */
2150
2151 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2152                                     const struct in6_addr *addr,
2153                                     bool anycast)
2154 {
2155         struct net *net = dev_net(idev->dev);
2156         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2157                                             DST_NOCOUNT, NULL);
2158         if (!rt)
2159                 return ERR_PTR(-ENOMEM);
2160
2161         in6_dev_hold(idev);
2162
2163         rt->dst.flags |= DST_HOST;
2164         rt->dst.input = ip6_input;
2165         rt->dst.output = ip6_output;
2166         rt->rt6i_idev = idev;
2167
2168         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2169         if (anycast)
2170                 rt->rt6i_flags |= RTF_ANYCAST;
2171         else
2172                 rt->rt6i_flags |= RTF_LOCAL;
2173
2174         rt->rt6i_gateway  = *addr;
2175         rt->rt6i_dst.addr = *addr;
2176         rt->rt6i_dst.plen = 128;
2177         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2178
2179         atomic_set(&rt->dst.__refcnt, 1);
2180
2181         return rt;
2182 }
2183
2184 int ip6_route_get_saddr(struct net *net,
2185                         struct rt6_info *rt,
2186                         const struct in6_addr *daddr,
2187                         unsigned int prefs,
2188                         struct in6_addr *saddr)
2189 {
2190         struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2191         int err = 0;
2192         if (rt->rt6i_prefsrc.plen)
2193                 *saddr = rt->rt6i_prefsrc.addr;
2194         else
2195                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2196                                          daddr, prefs, saddr);
2197         return err;
2198 }
2199
2200 /* remove deleted ip from prefsrc entries */
2201 struct arg_dev_net_ip {
2202         struct net_device *dev;
2203         struct net *net;
2204         struct in6_addr *addr;
2205 };
2206
2207 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2208 {
2209         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2210         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2211         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2212
2213         if (((void *)rt->dst.dev == dev || !dev) &&
2214             rt != net->ipv6.ip6_null_entry &&
2215             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2216                 /* remove prefsrc entry */
2217                 rt->rt6i_prefsrc.plen = 0;
2218         }
2219         return 0;
2220 }
2221
2222 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2223 {
2224         struct net *net = dev_net(ifp->idev->dev);
2225         struct arg_dev_net_ip adni = {
2226                 .dev = ifp->idev->dev,
2227                 .net = net,
2228                 .addr = &ifp->addr,
2229         };
2230         fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2231 }
2232
2233 #define RTF_RA_ROUTER           (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2234 #define RTF_CACHE_GATEWAY       (RTF_GATEWAY | RTF_CACHE)
2235
2236 /* Remove routers and update dst entries when gateway turn into host. */
2237 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2238 {
2239         struct in6_addr *gateway = (struct in6_addr *)arg;
2240
2241         if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2242              ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2243              ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2244                 return -1;
2245         }
2246         return 0;
2247 }
2248
2249 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2250 {
2251         fib6_clean_all(net, fib6_clean_tohost, gateway);
2252 }
2253
2254 struct arg_dev_net {
2255         struct net_device *dev;
2256         struct net *net;
2257 };
2258
2259 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2260 {
2261         const struct arg_dev_net *adn = arg;
2262         const struct net_device *dev = adn->dev;
2263
2264         if ((rt->dst.dev == dev || !dev) &&
2265             rt != adn->net->ipv6.ip6_null_entry)
2266                 return -1;
2267
2268         return 0;
2269 }
2270
2271 void rt6_ifdown(struct net *net, struct net_device *dev)
2272 {
2273         struct arg_dev_net adn = {
2274                 .dev = dev,
2275                 .net = net,
2276         };
2277
2278         fib6_clean_all(net, fib6_ifdown, &adn);
2279         icmp6_clean_all(fib6_ifdown, &adn);
2280 }
2281
2282 struct rt6_mtu_change_arg {
2283         struct net_device *dev;
2284         unsigned int mtu;
2285 };
2286
2287 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2288 {
2289         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2290         struct inet6_dev *idev;
2291
2292         /* In IPv6 pmtu discovery is not optional,
2293            so that RTAX_MTU lock cannot disable it.
2294            We still use this lock to block changes
2295            caused by addrconf/ndisc.
2296         */
2297
2298         idev = __in6_dev_get(arg->dev);
2299         if (!idev)
2300                 return 0;
2301
2302         /* For administrative MTU increase, there is no way to discover
2303            IPv6 PMTU increase, so PMTU increase should be updated here.
2304            Since RFC 1981 doesn't include administrative MTU increase
2305            update PMTU increase is a MUST. (i.e. jumbo frame)
2306          */
2307         /*
2308            If new MTU is less than route PMTU, this new MTU will be the
2309            lowest MTU in the path, update the route PMTU to reflect PMTU
2310            decreases; if new MTU is greater than route PMTU, and the
2311            old MTU is the lowest MTU in the path, update the route PMTU
2312            to reflect the increase. In this case if the other nodes' MTU
2313            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2314            PMTU discouvery.
2315          */
2316         if (rt->dst.dev == arg->dev &&
2317             !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2318             (dst_mtu(&rt->dst) >= arg->mtu ||
2319              (dst_mtu(&rt->dst) < arg->mtu &&
2320               dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2321                 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2322         }
2323         return 0;
2324 }
2325
2326 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2327 {
2328         struct rt6_mtu_change_arg arg = {
2329                 .dev = dev,
2330                 .mtu = mtu,
2331         };
2332
2333         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2334 }
2335
2336 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2337         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2338         [RTA_OIF]               = { .type = NLA_U32 },
2339         [RTA_IIF]               = { .type = NLA_U32 },
2340         [RTA_PRIORITY]          = { .type = NLA_U32 },
2341         [RTA_METRICS]           = { .type = NLA_NESTED },
2342         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2343 };
2344
2345 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2346                               struct fib6_config *cfg)
2347 {
2348         struct rtmsg *rtm;
2349         struct nlattr *tb[RTA_MAX+1];
2350         int err;
2351
2352         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2353         if (err < 0)
2354                 goto errout;
2355
2356         err = -EINVAL;
2357         rtm = nlmsg_data(nlh);
2358         memset(cfg, 0, sizeof(*cfg));
2359
2360         cfg->fc_table = rtm->rtm_table;
2361         cfg->fc_dst_len = rtm->rtm_dst_len;
2362         cfg->fc_src_len = rtm->rtm_src_len;
2363         cfg->fc_flags = RTF_UP;
2364         cfg->fc_protocol = rtm->rtm_protocol;
2365         cfg->fc_type = rtm->rtm_type;
2366
2367         if (rtm->rtm_type == RTN_UNREACHABLE ||
2368             rtm->rtm_type == RTN_BLACKHOLE ||
2369             rtm->rtm_type == RTN_PROHIBIT ||
2370             rtm->rtm_type == RTN_THROW)
2371                 cfg->fc_flags |= RTF_REJECT;
2372
2373         if (rtm->rtm_type == RTN_LOCAL)
2374                 cfg->fc_flags |= RTF_LOCAL;
2375
2376         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2377         cfg->fc_nlinfo.nlh = nlh;
2378         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2379
2380         if (tb[RTA_GATEWAY]) {
2381                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2382                 cfg->fc_flags |= RTF_GATEWAY;
2383         }
2384
2385         if (tb[RTA_DST]) {
2386                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2387
2388                 if (nla_len(tb[RTA_DST]) < plen)
2389                         goto errout;
2390
2391                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2392         }
2393
2394         if (tb[RTA_SRC]) {
2395                 int plen = (rtm->rtm_src_len + 7) >> 3;
2396
2397                 if (nla_len(tb[RTA_SRC]) < plen)
2398                         goto errout;
2399
2400                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2401         }
2402
2403         if (tb[RTA_PREFSRC])
2404                 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2405
2406         if (tb[RTA_OIF])
2407                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2408
2409         if (tb[RTA_PRIORITY])
2410                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2411
2412         if (tb[RTA_METRICS]) {
2413                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2414                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2415         }
2416
2417         if (tb[RTA_TABLE])
2418                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2419
2420         if (tb[RTA_MULTIPATH]) {
2421                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2422                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2423         }
2424
2425         err = 0;
2426 errout:
2427         return err;
2428 }
2429
2430 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2431 {
2432         struct fib6_config r_cfg;
2433         struct rtnexthop *rtnh;
2434         int remaining;
2435         int attrlen;
2436         int err = 0, last_err = 0;
2437
2438 beginning:
2439         rtnh = (struct rtnexthop *)cfg->fc_mp;
2440         remaining = cfg->fc_mp_len;
2441
2442         /* Parse a Multipath Entry */
2443         while (rtnh_ok(rtnh, remaining)) {
2444                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2445                 if (rtnh->rtnh_ifindex)
2446                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2447
2448                 attrlen = rtnh_attrlen(rtnh);
2449                 if (attrlen > 0) {
2450                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2451
2452                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2453                         if (nla) {
2454                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2455                                 r_cfg.fc_flags |= RTF_GATEWAY;
2456                         }
2457                 }
2458                 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2459                 if (err) {
2460                         last_err = err;
2461                         /* If we are trying to remove a route, do not stop the
2462                          * loop when ip6_route_del() fails (because next hop is
2463                          * already gone), we should try to remove all next hops.
2464                          */
2465                         if (add) {
2466                                 /* If add fails, we should try to delete all
2467                                  * next hops that have been already added.
2468                                  */
2469                                 add = 0;
2470                                 goto beginning;
2471                         }
2472                 }
2473                 /* Because each route is added like a single route we remove
2474                  * this flag after the first nexthop (if there is a collision,
2475                  * we have already fail to add the first nexthop:
2476                  * fib6_add_rt2node() has reject it).
2477                  */
2478                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2479                 rtnh = rtnh_next(rtnh, &remaining);
2480         }
2481
2482         return last_err;
2483 }
2484
2485 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2486 {
2487         struct fib6_config cfg;
2488         int err;
2489
2490         err = rtm_to_fib6_config(skb, nlh, &cfg);
2491         if (err < 0)
2492                 return err;
2493
2494         if (cfg.fc_mp)
2495                 return ip6_route_multipath(&cfg, 0);
2496         else
2497                 return ip6_route_del(&cfg);
2498 }
2499
2500 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2501 {
2502         struct fib6_config cfg;
2503         int err;
2504
2505         err = rtm_to_fib6_config(skb, nlh, &cfg);
2506         if (err < 0)
2507                 return err;
2508
2509         if (cfg.fc_mp)
2510                 return ip6_route_multipath(&cfg, 1);
2511         else
2512                 return ip6_route_add(&cfg);
2513 }
2514
2515 static inline size_t rt6_nlmsg_size(void)
2516 {
2517         return NLMSG_ALIGN(sizeof(struct rtmsg))
2518                + nla_total_size(16) /* RTA_SRC */
2519                + nla_total_size(16) /* RTA_DST */
2520                + nla_total_size(16) /* RTA_GATEWAY */
2521                + nla_total_size(16) /* RTA_PREFSRC */
2522                + nla_total_size(4) /* RTA_TABLE */
2523                + nla_total_size(4) /* RTA_IIF */
2524                + nla_total_size(4) /* RTA_OIF */
2525                + nla_total_size(4) /* RTA_PRIORITY */
2526                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2527                + nla_total_size(sizeof(struct rta_cacheinfo));
2528 }
2529
2530 static int rt6_fill_node(struct net *net,
2531                          struct sk_buff *skb, struct rt6_info *rt,
2532                          struct in6_addr *dst, struct in6_addr *src,
2533                          int iif, int type, u32 portid, u32 seq,
2534                          int prefix, int nowait, unsigned int flags)
2535 {
2536         struct rtmsg *rtm;
2537         struct nlmsghdr *nlh;
2538         long expires;
2539         u32 table;
2540
2541         if (prefix) {   /* user wants prefix routes only */
2542                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2543                         /* success since this is not a prefix route */
2544                         return 1;
2545                 }
2546         }
2547
2548         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2549         if (!nlh)
2550                 return -EMSGSIZE;
2551
2552         rtm = nlmsg_data(nlh);
2553         rtm->rtm_family = AF_INET6;
2554         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2555         rtm->rtm_src_len = rt->rt6i_src.plen;
2556         rtm->rtm_tos = 0;
2557         if (rt->rt6i_table)
2558                 table = rt->rt6i_table->tb6_id;
2559         else
2560                 table = RT6_TABLE_UNSPEC;
2561         rtm->rtm_table = table;
2562         if (nla_put_u32(skb, RTA_TABLE, table))
2563                 goto nla_put_failure;
2564         if (rt->rt6i_flags & RTF_REJECT) {
2565                 switch (rt->dst.error) {
2566                 case -EINVAL:
2567                         rtm->rtm_type = RTN_BLACKHOLE;
2568                         break;
2569                 case -EACCES:
2570                         rtm->rtm_type = RTN_PROHIBIT;
2571                         break;
2572                 case -EAGAIN:
2573                         rtm->rtm_type = RTN_THROW;
2574                         break;
2575                 default:
2576                         rtm->rtm_type = RTN_UNREACHABLE;
2577                         break;
2578                 }
2579         }
2580         else if (rt->rt6i_flags & RTF_LOCAL)
2581                 rtm->rtm_type = RTN_LOCAL;
2582         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2583                 rtm->rtm_type = RTN_LOCAL;
2584         else
2585                 rtm->rtm_type = RTN_UNICAST;
2586         rtm->rtm_flags = 0;
2587         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2588         rtm->rtm_protocol = rt->rt6i_protocol;
2589         if (rt->rt6i_flags & RTF_DYNAMIC)
2590                 rtm->rtm_protocol = RTPROT_REDIRECT;
2591         else if (rt->rt6i_flags & RTF_ADDRCONF) {
2592                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2593                         rtm->rtm_protocol = RTPROT_RA;
2594                 else
2595                         rtm->rtm_protocol = RTPROT_KERNEL;
2596         }
2597
2598         if (rt->rt6i_flags & RTF_CACHE)
2599                 rtm->rtm_flags |= RTM_F_CLONED;
2600
2601         if (dst) {
2602                 if (nla_put(skb, RTA_DST, 16, dst))
2603                         goto nla_put_failure;
2604                 rtm->rtm_dst_len = 128;
2605         } else if (rtm->rtm_dst_len)
2606                 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2607                         goto nla_put_failure;
2608 #ifdef CONFIG_IPV6_SUBTREES
2609         if (src) {
2610                 if (nla_put(skb, RTA_SRC, 16, src))
2611                         goto nla_put_failure;
2612                 rtm->rtm_src_len = 128;
2613         } else if (rtm->rtm_src_len &&
2614                    nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2615                 goto nla_put_failure;
2616 #endif
2617         if (iif) {
2618 #ifdef CONFIG_IPV6_MROUTE
2619                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2620                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2621                         if (err <= 0) {
2622                                 if (!nowait) {
2623                                         if (err == 0)
2624                                                 return 0;
2625                                         goto nla_put_failure;
2626                                 } else {
2627                                         if (err == -EMSGSIZE)
2628                                                 goto nla_put_failure;
2629                                 }
2630                         }
2631                 } else
2632 #endif
2633                         if (nla_put_u32(skb, RTA_IIF, iif))
2634                                 goto nla_put_failure;
2635         } else if (dst) {
2636                 struct in6_addr saddr_buf;
2637                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2638                     nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2639                         goto nla_put_failure;
2640         }
2641
2642         if (rt->rt6i_prefsrc.plen) {
2643                 struct in6_addr saddr_buf;
2644                 saddr_buf = rt->rt6i_prefsrc.addr;
2645                 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2646                         goto nla_put_failure;
2647         }
2648
2649         if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2650                 goto nla_put_failure;
2651
2652         if (rt->rt6i_flags & RTF_GATEWAY) {
2653                 if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2654                         goto nla_put_failure;
2655         }
2656
2657         if (rt->dst.dev &&
2658             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2659                 goto nla_put_failure;
2660         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2661                 goto nla_put_failure;
2662
2663         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2664
2665         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2666                 goto nla_put_failure;
2667
2668         return nlmsg_end(skb, nlh);
2669
2670 nla_put_failure:
2671         nlmsg_cancel(skb, nlh);
2672         return -EMSGSIZE;
2673 }
2674
2675 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2676 {
2677         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2678         int prefix;
2679
2680         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2681                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2682                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2683         } else
2684                 prefix = 0;
2685
2686         return rt6_fill_node(arg->net,
2687                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2688                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2689                      prefix, 0, NLM_F_MULTI);
2690 }
2691
2692 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
2693 {
2694         struct net *net = sock_net(in_skb->sk);
2695         struct nlattr *tb[RTA_MAX+1];
2696         struct rt6_info *rt;
2697         struct sk_buff *skb;
2698         struct rtmsg *rtm;
2699         struct flowi6 fl6;
2700         int err, iif = 0, oif = 0;
2701
2702         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2703         if (err < 0)
2704                 goto errout;
2705
2706         err = -EINVAL;
2707         memset(&fl6, 0, sizeof(fl6));
2708
2709         if (tb[RTA_SRC]) {
2710                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2711                         goto errout;
2712
2713                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2714         }
2715
2716         if (tb[RTA_DST]) {
2717                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2718                         goto errout;
2719
2720                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2721         }
2722
2723         if (tb[RTA_IIF])
2724                 iif = nla_get_u32(tb[RTA_IIF]);
2725
2726         if (tb[RTA_OIF])
2727                 oif = nla_get_u32(tb[RTA_OIF]);
2728
2729         if (tb[RTA_MARK])
2730                 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
2731
2732         if (iif) {
2733                 struct net_device *dev;
2734                 int flags = 0;
2735
2736                 dev = __dev_get_by_index(net, iif);
2737                 if (!dev) {
2738                         err = -ENODEV;
2739                         goto errout;
2740                 }
2741
2742                 fl6.flowi6_iif = iif;
2743
2744                 if (!ipv6_addr_any(&fl6.saddr))
2745                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2746
2747                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2748                                                                flags);
2749         } else {
2750                 fl6.flowi6_oif = oif;
2751
2752                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2753         }
2754
2755         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2756         if (!skb) {
2757                 ip6_rt_put(rt);
2758                 err = -ENOBUFS;
2759                 goto errout;
2760         }
2761
2762         /* Reserve room for dummy headers, this skb can pass
2763            through good chunk of routing engine.
2764          */
2765         skb_reset_mac_header(skb);
2766         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2767
2768         skb_dst_set(skb, &rt->dst);
2769
2770         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2771                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2772                             nlh->nlmsg_seq, 0, 0, 0);
2773         if (err < 0) {
2774                 kfree_skb(skb);
2775                 goto errout;
2776         }
2777
2778         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2779 errout:
2780         return err;
2781 }
2782
2783 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2784 {
2785         struct sk_buff *skb;
2786         struct net *net = info->nl_net;
2787         u32 seq;
2788         int err;
2789
2790         err = -ENOBUFS;
2791         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2792
2793         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2794         if (!skb)
2795                 goto errout;
2796
2797         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2798                                 event, info->portid, seq, 0, 0, 0);
2799         if (err < 0) {
2800                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2801                 WARN_ON(err == -EMSGSIZE);
2802                 kfree_skb(skb);
2803                 goto errout;
2804         }
2805         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2806                     info->nlh, gfp_any());
2807         return;
2808 errout:
2809         if (err < 0)
2810                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2811 }
2812
2813 static int ip6_route_dev_notify(struct notifier_block *this,
2814                                 unsigned long event, void *ptr)
2815 {
2816         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2817         struct net *net = dev_net(dev);
2818
2819         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2820                 net->ipv6.ip6_null_entry->dst.dev = dev;
2821                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2822 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2823                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2824                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2825                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2826                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2827 #endif
2828         }
2829
2830         return NOTIFY_OK;
2831 }
2832
2833 /*
2834  *      /proc
2835  */
2836
2837 #ifdef CONFIG_PROC_FS
2838
2839 static const struct file_operations ipv6_route_proc_fops = {
2840         .owner          = THIS_MODULE,
2841         .open           = ipv6_route_open,
2842         .read           = seq_read,
2843         .llseek         = seq_lseek,
2844         .release        = seq_release_net,
2845 };
2846
2847 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2848 {
2849         struct net *net = (struct net *)seq->private;
2850         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2851                    net->ipv6.rt6_stats->fib_nodes,
2852                    net->ipv6.rt6_stats->fib_route_nodes,
2853                    net->ipv6.rt6_stats->fib_rt_alloc,
2854                    net->ipv6.rt6_stats->fib_rt_entries,
2855                    net->ipv6.rt6_stats->fib_rt_cache,
2856                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2857                    net->ipv6.rt6_stats->fib_discarded_routes);
2858
2859         return 0;
2860 }
2861
2862 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2863 {
2864         return single_open_net(inode, file, rt6_stats_seq_show);
2865 }
2866
2867 static const struct file_operations rt6_stats_seq_fops = {
2868         .owner   = THIS_MODULE,
2869         .open    = rt6_stats_seq_open,
2870         .read    = seq_read,
2871         .llseek  = seq_lseek,
2872         .release = single_release_net,
2873 };
2874 #endif  /* CONFIG_PROC_FS */
2875
2876 #ifdef CONFIG_SYSCTL
2877
2878 static
2879 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
2880                               void __user *buffer, size_t *lenp, loff_t *ppos)
2881 {
2882         struct net *net;
2883         int delay;
2884         if (!write)
2885                 return -EINVAL;
2886
2887         net = (struct net *)ctl->extra1;
2888         delay = net->ipv6.sysctl.flush_delay;
2889         proc_dointvec(ctl, write, buffer, lenp, ppos);
2890         fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
2891         return 0;
2892 }
2893
2894 struct ctl_table ipv6_route_table_template[] = {
2895         {
2896                 .procname       =       "flush",
2897                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2898                 .maxlen         =       sizeof(int),
2899                 .mode           =       0200,
2900                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2901         },
2902         {
2903                 .procname       =       "gc_thresh",
2904                 .data           =       &ip6_dst_ops_template.gc_thresh,
2905                 .maxlen         =       sizeof(int),
2906                 .mode           =       0644,
2907                 .proc_handler   =       proc_dointvec,
2908         },
2909         {
2910                 .procname       =       "max_size",
2911                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2912                 .maxlen         =       sizeof(int),
2913                 .mode           =       0644,
2914                 .proc_handler   =       proc_dointvec,
2915         },
2916         {
2917                 .procname       =       "gc_min_interval",
2918                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2919                 .maxlen         =       sizeof(int),
2920                 .mode           =       0644,
2921                 .proc_handler   =       proc_dointvec_jiffies,
2922         },
2923         {
2924                 .procname       =       "gc_timeout",
2925                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2926                 .maxlen         =       sizeof(int),
2927                 .mode           =       0644,
2928                 .proc_handler   =       proc_dointvec_jiffies,
2929         },
2930         {
2931                 .procname       =       "gc_interval",
2932                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2933                 .maxlen         =       sizeof(int),
2934                 .mode           =       0644,
2935                 .proc_handler   =       proc_dointvec_jiffies,
2936         },
2937         {
2938                 .procname       =       "gc_elasticity",
2939                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2940                 .maxlen         =       sizeof(int),
2941                 .mode           =       0644,
2942                 .proc_handler   =       proc_dointvec,
2943         },
2944         {
2945                 .procname       =       "mtu_expires",
2946                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2947                 .maxlen         =       sizeof(int),
2948                 .mode           =       0644,
2949                 .proc_handler   =       proc_dointvec_jiffies,
2950         },
2951         {
2952                 .procname       =       "min_adv_mss",
2953                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2954                 .maxlen         =       sizeof(int),
2955                 .mode           =       0644,
2956                 .proc_handler   =       proc_dointvec,
2957         },
2958         {
2959                 .procname       =       "gc_min_interval_ms",
2960                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2961                 .maxlen         =       sizeof(int),
2962                 .mode           =       0644,
2963                 .proc_handler   =       proc_dointvec_ms_jiffies,
2964         },
2965         { }
2966 };
2967
2968 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2969 {
2970         struct ctl_table *table;
2971
2972         table = kmemdup(ipv6_route_table_template,
2973                         sizeof(ipv6_route_table_template),
2974                         GFP_KERNEL);
2975
2976         if (table) {
2977                 table[0].data = &net->ipv6.sysctl.flush_delay;
2978                 table[0].extra1 = net;
2979                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2980                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2981                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2982                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2983                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2984                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2985                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2986                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2987                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2988
2989                 /* Don't export sysctls to unprivileged users */
2990                 if (net->user_ns != &init_user_ns)
2991                         table[0].procname = NULL;
2992         }
2993
2994         return table;
2995 }
2996 #endif
2997
2998 static int __net_init ip6_route_net_init(struct net *net)
2999 {
3000         int ret = -ENOMEM;
3001
3002         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3003                sizeof(net->ipv6.ip6_dst_ops));
3004
3005         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3006                 goto out_ip6_dst_ops;
3007
3008         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3009                                            sizeof(*net->ipv6.ip6_null_entry),
3010                                            GFP_KERNEL);
3011         if (!net->ipv6.ip6_null_entry)
3012                 goto out_ip6_dst_entries;
3013         net->ipv6.ip6_null_entry->dst.path =
3014                 (struct dst_entry *)net->ipv6.ip6_null_entry;
3015         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3016         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3017                          ip6_template_metrics, true);
3018
3019 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3020         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3021                                                sizeof(*net->ipv6.ip6_prohibit_entry),
3022                                                GFP_KERNEL);
3023         if (!net->ipv6.ip6_prohibit_entry)
3024                 goto out_ip6_null_entry;
3025         net->ipv6.ip6_prohibit_entry->dst.path =
3026                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3027         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3028         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3029                          ip6_template_metrics, true);
3030
3031         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3032                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
3033                                                GFP_KERNEL);
3034         if (!net->ipv6.ip6_blk_hole_entry)
3035                 goto out_ip6_prohibit_entry;
3036         net->ipv6.ip6_blk_hole_entry->dst.path =
3037                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3038         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3039         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3040                          ip6_template_metrics, true);
3041 #endif
3042
3043         net->ipv6.sysctl.flush_delay = 0;
3044         net->ipv6.sysctl.ip6_rt_max_size = 4096;
3045         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3046         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3047         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3048         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3049         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3050         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3051
3052         net->ipv6.ip6_rt_gc_expire = 30*HZ;
3053
3054         ret = 0;
3055 out:
3056         return ret;
3057
3058 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3059 out_ip6_prohibit_entry:
3060         kfree(net->ipv6.ip6_prohibit_entry);
3061 out_ip6_null_entry:
3062         kfree(net->ipv6.ip6_null_entry);
3063 #endif
3064 out_ip6_dst_entries:
3065         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3066 out_ip6_dst_ops:
3067         goto out;
3068 }
3069
3070 static void __net_exit ip6_route_net_exit(struct net *net)
3071 {
3072         kfree(net->ipv6.ip6_null_entry);
3073 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3074         kfree(net->ipv6.ip6_prohibit_entry);
3075         kfree(net->ipv6.ip6_blk_hole_entry);
3076 #endif
3077         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3078 }
3079
3080 static int __net_init ip6_route_net_init_late(struct net *net)
3081 {
3082 #ifdef CONFIG_PROC_FS
3083         proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3084         proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3085 #endif
3086         return 0;
3087 }
3088
3089 static void __net_exit ip6_route_net_exit_late(struct net *net)
3090 {
3091 #ifdef CONFIG_PROC_FS
3092         remove_proc_entry("ipv6_route", net->proc_net);
3093         remove_proc_entry("rt6_stats", net->proc_net);
3094 #endif
3095 }
3096
3097 static struct pernet_operations ip6_route_net_ops = {
3098         .init = ip6_route_net_init,
3099         .exit = ip6_route_net_exit,
3100 };
3101
3102 static int __net_init ipv6_inetpeer_init(struct net *net)
3103 {
3104         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3105
3106         if (!bp)
3107                 return -ENOMEM;
3108         inet_peer_base_init(bp);
3109         net->ipv6.peers = bp;
3110         return 0;
3111 }
3112
3113 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3114 {
3115         struct inet_peer_base *bp = net->ipv6.peers;
3116
3117         net->ipv6.peers = NULL;
3118         inetpeer_invalidate_tree(bp);
3119         kfree(bp);
3120 }
3121
3122 static struct pernet_operations ipv6_inetpeer_ops = {
3123         .init   =       ipv6_inetpeer_init,
3124         .exit   =       ipv6_inetpeer_exit,
3125 };
3126
3127 static struct pernet_operations ip6_route_net_late_ops = {
3128         .init = ip6_route_net_init_late,
3129         .exit = ip6_route_net_exit_late,
3130 };
3131
3132 static struct notifier_block ip6_route_dev_notifier = {
3133         .notifier_call = ip6_route_dev_notify,
3134         .priority = 0,
3135 };
3136
3137 int __init ip6_route_init(void)
3138 {
3139         int ret;
3140
3141         ret = -ENOMEM;
3142         ip6_dst_ops_template.kmem_cachep =
3143                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3144                                   SLAB_HWCACHE_ALIGN, NULL);
3145         if (!ip6_dst_ops_template.kmem_cachep)
3146                 goto out;
3147
3148         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3149         if (ret)
3150                 goto out_kmem_cache;
3151
3152         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3153         if (ret)
3154                 goto out_dst_entries;
3155
3156         ret = register_pernet_subsys(&ip6_route_net_ops);
3157         if (ret)
3158                 goto out_register_inetpeer;
3159
3160         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3161
3162         /* Registering of the loopback is done before this portion of code,
3163          * the loopback reference in rt6_info will not be taken, do it
3164          * manually for init_net */
3165         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3166         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3167   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3168         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3169         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3170         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3171         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3172   #endif
3173         ret = fib6_init();
3174         if (ret)
3175                 goto out_register_subsys;
3176
3177         ret = xfrm6_init();
3178         if (ret)
3179                 goto out_fib6_init;
3180
3181         ret = fib6_rules_init();
3182         if (ret)
3183                 goto xfrm6_init;
3184
3185         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3186         if (ret)
3187                 goto fib6_rules_init;
3188
3189         ret = -ENOBUFS;
3190         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3191             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3192             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3193                 goto out_register_late_subsys;
3194
3195         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3196         if (ret)
3197                 goto out_register_late_subsys;
3198
3199 out:
3200         return ret;
3201
3202 out_register_late_subsys:
3203         unregister_pernet_subsys(&ip6_route_net_late_ops);
3204 fib6_rules_init:
3205         fib6_rules_cleanup();
3206 xfrm6_init:
3207         xfrm6_fini();
3208 out_fib6_init:
3209         fib6_gc_cleanup();
3210 out_register_subsys:
3211         unregister_pernet_subsys(&ip6_route_net_ops);
3212 out_register_inetpeer:
3213         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3214 out_dst_entries:
3215         dst_entries_destroy(&ip6_dst_blackhole_ops);
3216 out_kmem_cache:
3217         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3218         goto out;
3219 }
3220
3221 void ip6_route_cleanup(void)
3222 {
3223         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3224         unregister_pernet_subsys(&ip6_route_net_late_ops);
3225         fib6_rules_cleanup();
3226         xfrm6_fini();
3227         fib6_gc_cleanup();
3228         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3229         unregister_pernet_subsys(&ip6_route_net_ops);
3230         dst_entries_destroy(&ip6_dst_blackhole_ops);
3231         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3232 }