Merge branch 'parisc-4.9-1' of git://git.kernel.org/pub/scm/linux/kernel/git/deller...
[cascardo/linux.git] / drivers / net / vrf.c
index 1ce7420..85c271c 100644 (file)
@@ -37,9 +37,6 @@
 #include <net/l3mdev.h>
 #include <net/fib_rules.h>
 
-#define RT_FL_TOS(oldflp4) \
-       ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
-
 #define DRV_NAME       "vrf"
 #define DRV_VERSION    "1.0"
 
@@ -137,6 +134,20 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev,
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
+static int vrf_ip6_local_out(struct net *net, struct sock *sk,
+                            struct sk_buff *skb)
+{
+       int err;
+
+       err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net,
+                     sk, skb, NULL, skb_dst(skb)->dev, dst_output);
+
+       if (likely(err == 1))
+               err = dst_output(net, sk, skb);
+
+       return err;
+}
+
 static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
                                           struct net_device *dev)
 {
@@ -151,7 +162,7 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
                .flowlabel = ip6_flowinfo(iph),
                .flowi6_mark = skb->mark,
                .flowi6_proto = iph->nexthdr,
-               .flowi6_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF,
+               .flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF,
        };
        int ret = NET_XMIT_DROP;
        struct dst_entry *dst;
@@ -207,7 +218,7 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
        /* strip the ethernet header added for pass through VRF device */
        __skb_pull(skb, skb_network_offset(skb));
 
-       ret = ip6_local_out(net, skb->sk, skb);
+       ret = vrf_ip6_local_out(net, skb->sk, skb);
        if (unlikely(net_xmit_eval(ret)))
                dev->stats.tx_errors++;
        else
@@ -227,6 +238,20 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
 }
 #endif
 
+/* based on ip_local_out; can't use it b/c the dst is switched pointing to us */
+static int vrf_ip_local_out(struct net *net, struct sock *sk,
+                           struct sk_buff *skb)
+{
+       int err;
+
+       err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk,
+                     skb, NULL, skb_dst(skb)->dev, dst_output);
+       if (likely(err == 1))
+               err = dst_output(net, sk, skb);
+
+       return err;
+}
+
 static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
                                           struct net_device *vrf_dev)
 {
@@ -237,8 +262,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
                .flowi4_oif = vrf_dev->ifindex,
                .flowi4_iif = LOOPBACK_IFINDEX,
                .flowi4_tos = RT_TOS(ip4h->tos),
-               .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_L3MDEV_SRC |
-                               FLOWI_FLAG_SKIP_NH_OIF,
+               .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF,
                .daddr = ip4h->daddr,
        };
        struct net *net = dev_net(vrf_dev);
@@ -292,7 +316,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
                                               RT_SCOPE_LINK);
        }
 
-       ret = ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
+       ret = vrf_ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
        if (unlikely(net_xmit_eval(ret)))
                vrf_dev->stats.tx_errors++;
        else
@@ -377,6 +401,43 @@ static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb)
                            !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 }
 
+/* set dst on skb to send packet to us via dev_xmit path. Allows
+ * packet to go through device based features such as qdisc, netfilter
+ * hooks and packet sockets with skb->dev set to vrf device.
+ */
+static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
+                                  struct sock *sk,
+                                  struct sk_buff *skb)
+{
+       struct net_vrf *vrf = netdev_priv(vrf_dev);
+       struct dst_entry *dst = NULL;
+       struct rt6_info *rt6;
+
+       /* don't divert link scope packets */
+       if (rt6_need_strict(&ipv6_hdr(skb)->daddr))
+               return skb;
+
+       rcu_read_lock();
+
+       rt6 = rcu_dereference(vrf->rt6);
+       if (likely(rt6)) {
+               dst = &rt6->dst;
+               dst_hold(dst);
+       }
+
+       rcu_read_unlock();
+
+       if (unlikely(!dst)) {
+               vrf_tx_error(vrf_dev, skb);
+               return NULL;
+       }
+
+       skb_dst_drop(skb);
+       skb_dst_set(skb, dst);
+
+       return skb;
+}
+
 /* holding rtnl */
 static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
 {
@@ -463,6 +524,13 @@ out:
        return rc;
 }
 #else
+static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
+                                  struct sock *sk,
+                                  struct sk_buff *skb)
+{
+       return skb;
+}
+
 static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
 {
 }
@@ -531,6 +599,55 @@ static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
                            !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
 
+/* set dst on skb to send packet to us via dev_xmit path. Allows
+ * packet to go through device based features such as qdisc, netfilter
+ * hooks and packet sockets with skb->dev set to vrf device.
+ */
+static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev,
+                                 struct sock *sk,
+                                 struct sk_buff *skb)
+{
+       struct net_vrf *vrf = netdev_priv(vrf_dev);
+       struct dst_entry *dst = NULL;
+       struct rtable *rth;
+
+       rcu_read_lock();
+
+       rth = rcu_dereference(vrf->rth);
+       if (likely(rth)) {
+               dst = &rth->dst;
+               dst_hold(dst);
+       }
+
+       rcu_read_unlock();
+
+       if (unlikely(!dst)) {
+               vrf_tx_error(vrf_dev, skb);
+               return NULL;
+       }
+
+       skb_dst_drop(skb);
+       skb_dst_set(skb, dst);
+
+       return skb;
+}
+
+/* called with rcu lock held */
+static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev,
+                                 struct sock *sk,
+                                 struct sk_buff *skb,
+                                 u16 proto)
+{
+       switch (proto) {
+       case AF_INET:
+               return vrf_ip_out(vrf_dev, sk, skb);
+       case AF_INET6:
+               return vrf_ip6_out(vrf_dev, sk, skb);
+       }
+
+       return skb;
+}
+
 /* holding rtnl */
 static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf)
 {
@@ -722,63 +839,6 @@ static u32 vrf_fib_table(const struct net_device *dev)
        return vrf->tb_id;
 }
 
-static struct rtable *vrf_get_rtable(const struct net_device *dev,
-                                    const struct flowi4 *fl4)
-{
-       struct rtable *rth = NULL;
-
-       if (!(fl4->flowi4_flags & FLOWI_FLAG_L3MDEV_SRC)) {
-               struct net_vrf *vrf = netdev_priv(dev);
-
-               rcu_read_lock();
-
-               rth = rcu_dereference(vrf->rth);
-               if (likely(rth))
-                       dst_hold(&rth->dst);
-
-               rcu_read_unlock();
-       }
-
-       return rth;
-}
-
-/* called under rcu_read_lock */
-static int vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
-{
-       struct fib_result res = { .tclassid = 0 };
-       struct net *net = dev_net(dev);
-       u32 orig_tos = fl4->flowi4_tos;
-       u8 flags = fl4->flowi4_flags;
-       u8 scope = fl4->flowi4_scope;
-       u8 tos = RT_FL_TOS(fl4);
-       int rc;
-
-       if (unlikely(!fl4->daddr))
-               return 0;
-
-       fl4->flowi4_flags |= FLOWI_FLAG_SKIP_NH_OIF;
-       fl4->flowi4_iif = LOOPBACK_IFINDEX;
-       /* make sure oif is set to VRF device for lookup */
-       fl4->flowi4_oif = dev->ifindex;
-       fl4->flowi4_tos = tos & IPTOS_RT_MASK;
-       fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
-                            RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
-
-       rc = fib_lookup(net, fl4, &res, 0);
-       if (!rc) {
-               if (res.type == RTN_LOCAL)
-                       fl4->saddr = res.fi->fib_prefsrc ? : fl4->daddr;
-               else
-                       fib_select_path(net, &res, fl4, -1);
-       }
-
-       fl4->flowi4_flags = flags;
-       fl4->flowi4_tos = orig_tos;
-       fl4->flowi4_scope = scope;
-
-       return rc;
-}
-
 static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
        return 0;
@@ -970,106 +1030,44 @@ static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev,
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev,
-                                        struct flowi6 *fl6)
+/* send to link-local or multicast address via interface enslaved to
+ * VRF device. Force lookup to VRF table without changing flow struct
+ */
+static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev,
+                                             struct flowi6 *fl6)
 {
-       bool need_strict = rt6_need_strict(&fl6->daddr);
-       struct net_vrf *vrf = netdev_priv(dev);
        struct net *net = dev_net(dev);
+       int flags = RT6_LOOKUP_F_IFACE;
        struct dst_entry *dst = NULL;
        struct rt6_info *rt;
 
-       /* send to link-local or multicast address */
-       if (need_strict) {
-               int flags = RT6_LOOKUP_F_IFACE;
-
-               /* VRF device does not have a link-local address and
-                * sending packets to link-local or mcast addresses over
-                * a VRF device does not make sense
-                */
-               if (fl6->flowi6_oif == dev->ifindex) {
-                       struct dst_entry *dst = &net->ipv6.ip6_null_entry->dst;
-
-                       dst_hold(dst);
-                       return dst;
-               }
-
-               if (!ipv6_addr_any(&fl6->saddr))
-                       flags |= RT6_LOOKUP_F_HAS_SADDR;
-
-               rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags);
-               if (rt)
-                       dst = &rt->dst;
-
-       } else if (!(fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC)) {
-
-               rcu_read_lock();
-
-               rt = rcu_dereference(vrf->rt6);
-               if (likely(rt)) {
-                       dst = &rt->dst;
-                       dst_hold(dst);
-               }
-
-               rcu_read_unlock();
+       /* VRF device does not have a link-local address and
+        * sending packets to link-local or mcast addresses over
+        * a VRF device does not make sense
+        */
+       if (fl6->flowi6_oif == dev->ifindex) {
+               dst = &net->ipv6.ip6_null_entry->dst;
+               dst_hold(dst);
+               return dst;
        }
 
-       /* make sure oif is set to VRF device for lookup */
-       if (!need_strict)
-               fl6->flowi6_oif = dev->ifindex;
-
-       return dst;
-}
-
-/* called under rcu_read_lock */
-static int vrf_get_saddr6(struct net_device *dev, const struct sock *sk,
-                         struct flowi6 *fl6)
-{
-       struct net *net = dev_net(dev);
-       struct dst_entry *dst;
-       struct rt6_info *rt;
-       int err;
-
-       if (rt6_need_strict(&fl6->daddr)) {
-               rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif,
-                                         RT6_LOOKUP_F_IFACE);
-               if (unlikely(!rt))
-                       return 0;
+       if (!ipv6_addr_any(&fl6->saddr))
+               flags |= RT6_LOOKUP_F_HAS_SADDR;
 
+       rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags);
+       if (rt)
                dst = &rt->dst;
-       } else {
-               __u8 flags = fl6->flowi6_flags;
 
-               fl6->flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC;
-               fl6->flowi6_flags |= FLOWI_FLAG_SKIP_NH_OIF;
-
-               dst = ip6_route_output(net, sk, fl6);
-               rt = (struct rt6_info *)dst;
-
-               fl6->flowi6_flags = flags;
-       }
-
-       err = dst->error;
-       if (!err) {
-               err = ip6_route_get_saddr(net, rt, &fl6->daddr,
-                                         sk ? inet6_sk(sk)->srcprefs : 0,
-                                         &fl6->saddr);
-       }
-
-       dst_release(dst);
-
-       return err;
+       return dst;
 }
 #endif
 
 static const struct l3mdev_ops vrf_l3mdev_ops = {
        .l3mdev_fib_table       = vrf_fib_table,
-       .l3mdev_get_rtable      = vrf_get_rtable,
-       .l3mdev_get_saddr       = vrf_get_saddr,
        .l3mdev_l3_rcv          = vrf_l3_rcv,
+       .l3mdev_l3_out          = vrf_l3_out,
 #if IS_ENABLED(CONFIG_IPV6)
-       .l3mdev_get_rt6_dst     = vrf_get_rt6_dst,
-       .l3mdev_get_saddr6      = vrf_get_saddr6,
+       .l3mdev_link_scope_lookup = vrf_link_scope_lookup,
 #endif
 };