net: ipv6: Do not consider link state for nexthop validation
[cascardo/linux.git] / net / ipv6 / route.c
index e3a224b..947ed1d 100644 (file)
@@ -102,11 +102,13 @@ static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
 #ifdef CONFIG_IPV6_ROUTE_INFO
 static struct rt6_info *rt6_add_route_info(struct net *net,
                                           const struct in6_addr *prefix, int prefixlen,
-                                          const struct in6_addr *gwaddr, int ifindex,
+                                          const struct in6_addr *gwaddr,
+                                          struct net_device *dev,
                                           unsigned int pref);
 static struct rt6_info *rt6_get_route_info(struct net *net,
                                           const struct in6_addr *prefix, int prefixlen,
-                                          const struct in6_addr *gwaddr, int ifindex);
+                                          const struct in6_addr *gwaddr,
+                                          struct net_device *dev);
 #endif
 
 struct uncached_list {
@@ -656,7 +658,8 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
        struct net_device *dev = rt->dst.dev;
 
        if (dev && !netif_carrier_ok(dev) &&
-           idev->cnf.ignore_routes_with_linkdown)
+           idev->cnf.ignore_routes_with_linkdown &&
+           !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
                goto out;
 
        if (rt6_check_expired(rt))
@@ -803,7 +806,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
                rt = rt6_get_dflt_router(gwaddr, dev);
        else
                rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
-                                       gwaddr, dev->ifindex);
+                                       gwaddr, dev);
 
        if (rt && !lifetime) {
                ip6_del_rt(rt);
@@ -811,8 +814,8 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
        }
 
        if (!rt && lifetime)
-               rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
-                                       pref);
+               rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
+                                       dev, pref);
        else if (rt)
                rt->rt6i_flags = RTF_ROUTEINFO |
                                 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
@@ -1050,6 +1053,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
        int strict = 0;
 
        strict |= flags & RT6_LOOKUP_F_IFACE;
+       strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
        if (net->ipv6.devconf_all->forwarding == 0)
                strict |= RT6_LOOKUP_F_REACHABLE;
 
@@ -1147,15 +1151,16 @@ static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *
        return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
 }
 
-static struct dst_entry *ip6_route_input_lookup(struct net *net,
-                                               struct net_device *dev,
-                                               struct flowi6 *fl6, int flags)
+struct dst_entry *ip6_route_input_lookup(struct net *net,
+                                        struct net_device *dev,
+                                        struct flowi6 *fl6, int flags)
 {
        if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
                flags |= RT6_LOOKUP_F_IFACE;
 
        return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
 }
+EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
 
 void ip6_route_input(struct sk_buff *skb)
 {
@@ -1164,7 +1169,7 @@ void ip6_route_input(struct sk_buff *skb)
        int flags = RT6_LOOKUP_F_HAS_SADDR;
        struct ip_tunnel_info *tun_info;
        struct flowi6 fl6 = {
-               .flowi6_iif = l3mdev_fib_oif(skb->dev),
+               .flowi6_iif = skb->dev->ifindex,
                .daddr = iph->daddr,
                .saddr = iph->saddr,
                .flowlabel = ip6_flowinfo(iph),
@@ -1188,12 +1193,15 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table
 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
                                         struct flowi6 *fl6, int flags)
 {
-       struct dst_entry *dst;
        bool any_src;
 
-       dst = l3mdev_get_rt6_dst(net, fl6);
-       if (dst)
-               return dst;
+       if (rt6_need_strict(&fl6->daddr)) {
+               struct dst_entry *dst;
+
+               dst = l3mdev_link_scope_lookup(net, fl6);
+               if (dst)
+                       return dst;
+       }
 
        fl6->flowi6_iif = LOOPBACK_IFINDEX;
 
@@ -1604,7 +1612,9 @@ static unsigned int ip6_mtu(const struct dst_entry *dst)
        rcu_read_unlock();
 
 out:
-       return min_t(unsigned int, mtu, IP6_MAX_MTU);
+       mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
+
+       return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
 }
 
 static struct dst_entry *icmp6_dst_gc_list;
@@ -1783,7 +1793,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
        };
        struct fib6_table *table;
        struct rt6_info *rt;
-       int flags = RT6_LOOKUP_F_IFACE;
+       int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
 
        table = fib6_get_table(net, cfg->fc_table);
        if (!table)
@@ -2319,13 +2329,16 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
 #ifdef CONFIG_IPV6_ROUTE_INFO
 static struct rt6_info *rt6_get_route_info(struct net *net,
                                           const struct in6_addr *prefix, int prefixlen,
-                                          const struct in6_addr *gwaddr, int ifindex)
+                                          const struct in6_addr *gwaddr,
+                                          struct net_device *dev)
 {
+       u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
+       int ifindex = dev->ifindex;
        struct fib6_node *fn;
        struct rt6_info *rt = NULL;
        struct fib6_table *table;
 
-       table = fib6_get_table(net, RT6_TABLE_INFO);
+       table = fib6_get_table(net, tb_id);
        if (!table)
                return NULL;
 
@@ -2351,12 +2364,13 @@ out:
 
 static struct rt6_info *rt6_add_route_info(struct net *net,
                                           const struct in6_addr *prefix, int prefixlen,
-                                          const struct in6_addr *gwaddr, int ifindex,
+                                          const struct in6_addr *gwaddr,
+                                          struct net_device *dev,
                                           unsigned int pref)
 {
        struct fib6_config cfg = {
                .fc_metric      = IP6_RT_PRIO_USER,
-               .fc_ifindex     = ifindex,
+               .fc_ifindex     = dev->ifindex,
                .fc_dst_len     = prefixlen,
                .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
                                  RTF_UP | RTF_PREF(pref),
@@ -2365,7 +2379,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
                .fc_nlinfo.nl_net = net,
        };
 
-       cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
+       cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
        cfg.fc_dst = *prefix;
        cfg.fc_gateway = *gwaddr;
 
@@ -2375,16 +2389,17 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
 
        ip6_route_add(&cfg);
 
-       return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
+       return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
 }
 #endif
 
 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
 {
+       u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
        struct rt6_info *rt;
        struct fib6_table *table;
 
-       table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
+       table = fib6_get_table(dev_net(dev), tb_id);
        if (!table)
                return NULL;
 
@@ -2418,20 +2433,20 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
 
        cfg.fc_gateway = *gwaddr;
 
-       ip6_route_add(&cfg);
+       if (!ip6_route_add(&cfg)) {
+               struct fib6_table *table;
+
+               table = fib6_get_table(dev_net(dev), cfg.fc_table);
+               if (table)
+                       table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
+       }
 
        return rt6_get_dflt_router(gwaddr, dev);
 }
 
-void rt6_purge_dflt_routers(struct net *net)
+static void __rt6_purge_dflt_routers(struct fib6_table *table)
 {
        struct rt6_info *rt;
-       struct fib6_table *table;
-
-       /* NOTE: Keep consistent with rt6_get_dflt_router */
-       table = fib6_get_table(net, RT6_TABLE_DFLT);
-       if (!table)
-               return;
 
 restart:
        read_lock_bh(&table->tb6_lock);
@@ -2445,6 +2460,27 @@ restart:
                }
        }
        read_unlock_bh(&table->tb6_lock);
+
+       table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
+}
+
+void rt6_purge_dflt_routers(struct net *net)
+{
+       struct fib6_table *table;
+       struct hlist_head *head;
+       unsigned int h;
+
+       rcu_read_lock();
+
+       for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+               head = &net->ipv6.fib_table_hash[h];
+               hlist_for_each_entry_rcu(table, head, tb6_hlist) {
+                       if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
+                               __rt6_purge_dflt_routers(table);
+               }
+       }
+
+       rcu_read_unlock();
 }
 
 static void rtmsg_to_fib6_config(struct net *net,
@@ -2565,8 +2601,16 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 {
        u32 tb_id;
        struct net *net = dev_net(idev->dev);
-       struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
-                                           DST_NOCOUNT);
+       struct net_device *dev = net->loopback_dev;
+       struct rt6_info *rt;
+
+       /* use L3 Master device as loopback for host routes if device
+        * is enslaved and address is not link local or multicast
+        */
+       if (!rt6_need_strict(addr))
+               dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
+
+       rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
        if (!rt)
                return ERR_PTR(-ENOMEM);
 
@@ -3202,7 +3246,9 @@ static int rt6_fill_node(struct net *net,
        if (iif) {
 #ifdef CONFIG_IPV6_MROUTE
                if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
-                       int err = ip6mr_get_route(net, skb, rtm, nowait);
+                       int err = ip6mr_get_route(net, skb, rtm, nowait,
+                                                 portid);
+
                        if (err <= 0) {
                                if (!nowait) {
                                        if (err == 0)
@@ -3345,11 +3391,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
        } else {
                fl6.flowi6_oif = oif;
 
-               if (netif_index_is_l3_master(net, oif)) {
-                       fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
-                                          FLOWI_FLAG_SKIP_NH_OIF;
-               }
-
                rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
        }