net: Add source address lookup op for VRF
authorDavid Ahern <dsa@cumulusnetworks.com>
Mon, 5 Oct 2015 15:51:26 +0000 (08:51 -0700)
committerDavid S. Miller <davem@davemloft.net>
Wed, 7 Oct 2015 11:27:44 +0000 (04:27 -0700)
Add operation to l3mdev to lookup source address for a given flow.
Add support for the operation to VRF driver and convert existing
IPv4 hooks to use the new lookup.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/vrf.c
include/net/l3mdev.h
include/net/route.h
net/ipv4/udp.c

index 8713317..6449976 100644 (file)
@@ -36,6 +36,9 @@
 #include <net/addrconf.h>
 #include <net/l3mdev.h>
 
+#define RT_FL_TOS(oldflp4) \
+       ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
+
 #define DRV_NAME       "vrf"
 #define DRV_VERSION    "1.0"
 
@@ -553,9 +556,41 @@ static struct rtable *vrf_get_rtable(const struct net_device *dev,
        return rth;
 }
 
+/* called under rcu_read_lock */
+static void vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
+{
+       struct fib_result res = { .tclassid = 0 };
+       struct net *net = dev_net(dev);
+       u32 orig_tos = fl4->flowi4_tos;
+       u8 flags = fl4->flowi4_flags;
+       u8 scope = fl4->flowi4_scope;
+       u8 tos = RT_FL_TOS(fl4);
+
+       if (unlikely(!fl4->daddr))
+               return;
+
+       fl4->flowi4_flags |= FLOWI_FLAG_SKIP_NH_OIF;
+       fl4->flowi4_iif = LOOPBACK_IFINDEX;
+       fl4->flowi4_tos = tos & IPTOS_RT_MASK;
+       fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
+                            RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
+
+       if (!fib_lookup(net, fl4, &res, 0)) {
+               if (res.type == RTN_LOCAL)
+                       fl4->saddr = res.fi->fib_prefsrc ? : fl4->daddr;
+               else
+                       fib_select_path(net, &res, fl4, -1);
+       }
+
+       fl4->flowi4_flags = flags;
+       fl4->flowi4_tos = orig_tos;
+       fl4->flowi4_scope = scope;
+}
+
 static const struct l3mdev_ops vrf_l3mdev_ops = {
        .l3mdev_fib_table       = vrf_fib_table,
        .l3mdev_get_rtable      = vrf_get_rtable,
+       .l3mdev_get_saddr       = vrf_get_saddr,
 };
 
 static void vrf_get_drvinfo(struct net_device *dev,
index 87cee05..44a19a1 100644 (file)
  * @l3mdev_fib_table: Get FIB table id to use for lookups
  *
  * @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device
+ *
+ * @l3mdev_get_saddr: Get source address for a flow
  */
 
 struct l3mdev_ops {
        u32             (*l3mdev_fib_table)(const struct net_device *dev);
        struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev,
                                             const struct flowi4 *fl4);
+       void            (*l3mdev_get_saddr)(struct net_device *dev,
+                                           struct flowi4 *fl4);
 };
 
 #ifdef CONFIG_NET_L3_MASTER_DEV
@@ -100,6 +104,25 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
        return rc;
 }
 
+static inline void l3mdev_get_saddr(struct net *net, int ifindex,
+                                   struct flowi4 *fl4)
+{
+       struct net_device *dev;
+
+       if (ifindex) {
+
+               rcu_read_lock();
+
+               dev = dev_get_by_index_rcu(net, ifindex);
+               if (dev && netif_is_l3_master(dev) &&
+                   dev->l3mdev_ops->l3mdev_get_saddr) {
+                       dev->l3mdev_ops->l3mdev_get_saddr(dev, fl4);
+               }
+
+               rcu_read_unlock();
+       }
+}
+
 #else
 
 static inline int l3mdev_master_ifindex_rcu(struct net_device *dev)
@@ -144,6 +167,10 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
        return false;
 }
 
+static inline void l3mdev_get_saddr(struct net *net, int ifindex,
+                                   struct flowi4 *fl4)
+{
+}
 #endif
 
 #endif /* _NET_L3MDEV_H_ */
index 3e18d90..ee81307 100644 (file)
@@ -266,9 +266,6 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32
        if (inet_sk(sk)->transparent)
                flow_flags |= FLOWI_FLAG_ANYSRC;
 
-       if (netif_index_is_l3_master(sock_net(sk), oif))
-               flow_flags |= FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF;
-
        flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE,
                           protocol, flow_flags, dst, src, dport, sport);
 }
@@ -285,6 +282,10 @@ static inline struct rtable *ip_route_connect(struct flowi4 *fl4,
        ip_route_connect_init(fl4, dst, src, tos, oif, protocol,
                              sport, dport, sk);
 
+       if (!src && oif) {
+               l3mdev_get_saddr(net, oif, fl4);
+               src = fl4->saddr;
+       }
        if (!dst || !src) {
                rt = __ip_route_output_key(net, fl4);
                if (IS_ERR(rt))
index b2882cf..e1fc129 100644 (file)
@@ -1017,30 +1017,14 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
                fl4 = &fl4_stack;
 
-               /* unconnected socket. If output device is enslaved to a VRF
-                * device lookup source address from VRF table. This mimics
-                * behavior of ip_route_connect{_init}.
-                */
-               if (netif_index_is_l3_master(net, ipc.oif)) {
-                       flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
-                                          RT_SCOPE_UNIVERSE, sk->sk_protocol,
-                                          (flow_flags | FLOWI_FLAG_L3MDEV_SRC |
-                                           FLOWI_FLAG_SKIP_NH_OIF),
-                                          faddr, saddr, dport,
-                                          inet->inet_sport);
-
-                       rt = ip_route_output_flow(net, fl4, sk);
-                       if (!IS_ERR(rt)) {
-                               saddr = fl4->saddr;
-                               ip_rt_put(rt);
-                       }
-               }
-
                flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
                                   RT_SCOPE_UNIVERSE, sk->sk_protocol,
                                   flow_flags,
                                   faddr, saddr, dport, inet->inet_sport);
 
+               if (!saddr && ipc.oif)
+                       l3mdev_get_saddr(net, ipc.oif, fl4);
+
                security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
                rt = ip_route_output_flow(net, fl4, sk);
                if (IS_ERR(rt)) {