#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>
-#include <linux/types.h>
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/slab.h>
-#include <linux/skbuff.h>
-#include <linux/rculist.h>
-#include <linux/netdevice.h>
-#include <linux/in.h>
-#include <linux/ip.h>
#include <linux/udp.h>
#include <linux/igmp.h>
-#include <linux/etherdevice.h>
#include <linux/if_ether.h>
-#include <linux/if_vlan.h>
-#include <linux/hash.h>
#include <linux/ethtool.h>
#include <net/arp.h>
#include <net/ndisc.h>
#include <net/ip.h>
-#include <net/ip_tunnels.h>
#include <net/icmp.h>
-#include <net/udp.h>
-#include <net/udp_tunnel.h>
#include <net/rtnetlink.h>
-#include <net/route.h>
-#include <net/dsfield.h>
#include <net/inet_ecn.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/vxlan.h>
-#include <net/protocol.h>
#if IS_ENABLED(CONFIG_IPV6)
-#include <net/ipv6.h>
-#include <net/addrconf.h>
#include <net/ip6_tunnel.h>
#include <net/ip6_checksum.h>
#endif
-#include <net/dst_metadata.h>
#define VXLAN_VERSION "0.1"
if (!net_eq(dev_net(vxlan->dev), vxlan->net) &&
nla_put_s32(skb, NDA_LINK_NETNSID,
- peernet2id_alloc(dev_net(vxlan->dev), vxlan->net)))
+ peernet2id(dev_net(vxlan->dev), vxlan->net)))
goto nla_put_failure;
if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
}
-/* Notify netdevs that UDP port started listening */
-static void vxlan_notify_add_rx_port(struct vxlan_sock *vs)
-{
- struct net_device *dev;
- struct sock *sk = vs->sock->sk;
- struct net *net = sock_net(sk);
- sa_family_t sa_family = vxlan_get_sk_family(vs);
- __be16 port = inet_sk(sk)->inet_sport;
-
- rcu_read_lock();
- for_each_netdev_rcu(net, dev) {
- if (dev->netdev_ops->ndo_add_vxlan_port)
- dev->netdev_ops->ndo_add_vxlan_port(dev, sa_family,
- port);
- }
- rcu_read_unlock();
-}
-
-/* Notify netdevs that UDP port is no more listening */
-static void vxlan_notify_del_rx_port(struct vxlan_sock *vs)
-{
- struct net_device *dev;
- struct sock *sk = vs->sock->sk;
- struct net *net = sock_net(sk);
- sa_family_t sa_family = vxlan_get_sk_family(vs);
- __be16 port = inet_sk(sk)->inet_sport;
-
- rcu_read_lock();
- for_each_netdev_rcu(net, dev) {
- if (dev->netdev_ops->ndo_del_vxlan_port)
- dev->netdev_ops->ndo_del_vxlan_port(dev, sa_family,
- port);
- }
- rcu_read_unlock();
-}
-
/* Add new entry to forwarding table -- assumes lock held */
static int vxlan_fdb_create(struct vxlan_dev *vxlan,
const u8 *mac, union vxlan_addr *ip,
/* Dump forwarding table */
static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev,
- struct net_device *filter_dev, int idx)
+ struct net_device *filter_dev, int *idx)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
unsigned int h;
+ int err = 0;
for (h = 0; h < FDB_HASH_SIZE; ++h) {
struct vxlan_fdb *f;
- int err;
hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) {
struct vxlan_rdst *rd;
list_for_each_entry_rcu(rd, &f->remotes, list) {
- if (idx < cb->args[0])
+ if (*idx < cb->args[2])
goto skip;
err = vxlan_fdb_info(skb, vxlan, f,
cb->nlh->nlmsg_seq,
RTM_NEWNEIGH,
NLM_F_MULTI, rd);
- if (err < 0) {
- cb->args[1] = err;
+ if (err < 0)
goto out;
- }
skip:
- ++idx;
+ *idx += 1;
}
}
}
out:
- return idx;
+ return err;
}
/* Watch incoming packets to learn mapping between Ethernet address
vn = net_generic(sock_net(vs->sock->sk), vxlan_net_id);
spin_lock(&vn->sock_lock);
hlist_del_rcu(&vs->hlist);
- vxlan_notify_del_rx_port(vs);
+ udp_tunnel_notify_del_rx_port(vs->sock,
+ (vs->flags & VXLAN_F_GPE) ?
+ UDP_TUNNEL_TYPE_VXLAN_GPE :
+ UDP_TUNNEL_TYPE_VXLAN);
spin_unlock(&vn->sock_lock);
return true;
struct metadata_dst *tun_dst;
tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY,
- vxlan_vni_to_tun_id(vni), sizeof(*md));
+ key32_to_tunnel_id(vni), sizeof(*md));
if (!tun_dst)
goto drop;
fl4.flowi4_mark = skb->mark;
fl4.flowi4_proto = IPPROTO_UDP;
fl4.daddr = daddr;
- fl4.saddr = vxlan->cfg.saddr.sin.sin_addr.s_addr;
+ fl4.saddr = *saddr;
rt = ip_route_output_key(vxlan->net, &fl4);
if (!IS_ERR(rt)) {
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_oif = oif;
fl6.daddr = *daddr;
- fl6.saddr = vxlan->cfg.saddr.sin6.sin6_addr;
+ fl6.saddr = *saddr;
fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tos), label);
fl6.flowi6_mark = skb->mark;
fl6.flowi6_proto = IPPROTO_UDP;
struct rtable *rt = NULL;
const struct iphdr *old_iph;
union vxlan_addr *dst;
- union vxlan_addr remote_ip;
+ union vxlan_addr remote_ip, local_ip;
+ union vxlan_addr *src;
struct vxlan_metadata _md;
struct vxlan_metadata *md = &_md;
__be16 src_port = 0, dst_port;
dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
vni = rdst->remote_vni;
dst = &rdst->remote_ip;
+ src = &vxlan->cfg.saddr;
dst_cache = &rdst->dst_cache;
} else {
if (!info) {
goto drop;
}
dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
- vni = vxlan_tun_id_to_vni(info->key.tun_id);
+ vni = tunnel_id_to_key32(info->key.tun_id);
remote_ip.sa.sa_family = ip_tunnel_info_af(info);
- if (remote_ip.sa.sa_family == AF_INET)
+ if (remote_ip.sa.sa_family == AF_INET) {
remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst;
- else
+ local_ip.sin.sin_addr.s_addr = info->key.u.ipv4.src;
+ } else {
remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst;
+ local_ip.sin6.sin6_addr = info->key.u.ipv6.src;
+ }
dst = &remote_ip;
+ src = &local_ip;
dst_cache = &info->dst_cache;
}
}
if (dst->sa.sa_family == AF_INET) {
- __be32 saddr;
-
if (!vxlan->vn4_sock)
goto drop;
sk = vxlan->vn4_sock->sock->sk;
rt = vxlan_get_route(vxlan, skb,
rdst ? rdst->remote_ifindex : 0, tos,
- dst->sin.sin_addr.s_addr, &saddr,
+ dst->sin.sin_addr.s_addr,
+ &src->sin.sin_addr.s_addr,
dst_cache, info);
if (IS_ERR(rt)) {
netdev_dbg(dev, "no route to %pI4\n",
}
/* Bypass encapsulation if the destination is local */
- if (rt->rt_flags & RTCF_LOCAL &&
+ if (!info && rt->rt_flags & RTCF_LOCAL &&
!(rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
struct vxlan_dev *dst_vxlan;
if (err < 0)
goto xmit_tx_error;
- udp_tunnel_xmit_skb(rt, sk, skb, saddr,
+ udp_tunnel_xmit_skb(rt, sk, skb, src->sin.sin_addr.s_addr,
dst->sin.sin_addr.s_addr, tos, ttl, df,
src_port, dst_port, xnet, !udp_sum);
#if IS_ENABLED(CONFIG_IPV6)
} else {
struct dst_entry *ndst;
- struct in6_addr saddr;
u32 rt6i_flags;
if (!vxlan->vn6_sock)
ndst = vxlan6_get_route(vxlan, skb,
rdst ? rdst->remote_ifindex : 0, tos,
- label, &dst->sin6.sin6_addr, &saddr,
+ label, &dst->sin6.sin6_addr,
+ &src->sin6.sin6_addr,
dst_cache, info);
if (IS_ERR(ndst)) {
netdev_dbg(dev, "no route to %pI6\n",
/* Bypass encapsulation if the destination is local */
rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags;
- if (rt6i_flags & RTF_LOCAL &&
+ if (!info && rt6i_flags & RTF_LOCAL &&
!(rt6i_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
struct vxlan_dev *dst_vxlan;
vni, md, flags, udp_sum);
if (err < 0) {
dst_release(ndst);
+ dev->stats.tx_errors++;
return;
}
udp_tunnel6_xmit_skb(ndst, sk, skb, dev,
- &saddr, &dst->sin6.sin6_addr, tos, ttl,
+ &src->sin6.sin6_addr,
+ &dst->sin6.sin6_addr, tos, ttl,
label, src_port, dst_port, !udp_sum);
#endif
}
.name = "vxlan",
};
-/* Calls the ndo_add_vxlan_port of the caller in order to
+/* Calls the ndo_udp_tunnel_add of the caller in order to
* supply the listening VXLAN udp ports. Callers are expected
- * to implement the ndo_add_vxlan_port.
+ * to implement the ndo_udp_tunnel_add.
*/
static void vxlan_push_rx_ports(struct net_device *dev)
{
struct vxlan_sock *vs;
struct net *net = dev_net(dev);
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
- sa_family_t sa_family;
- __be16 port;
unsigned int i;
- if (!dev->netdev_ops->ndo_add_vxlan_port)
- return;
-
spin_lock(&vn->sock_lock);
for (i = 0; i < PORT_HASH_SIZE; ++i) {
- hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) {
- port = inet_sk(vs->sock->sk)->inet_sport;
- sa_family = vxlan_get_sk_family(vs);
- dev->netdev_ops->ndo_add_vxlan_port(dev, sa_family,
- port);
- }
+ hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist)
+ udp_tunnel_push_rx_port(dev, vs->sock,
+ (vs->flags & VXLAN_F_GPE) ?
+ UDP_TUNNEL_TYPE_VXLAN_GPE :
+ UDP_TUNNEL_TYPE_VXLAN);
}
spin_unlock(&vn->sock_lock);
}
spin_lock(&vn->sock_lock);
hlist_add_head_rcu(&vs->hlist, vs_head(net, port));
- vxlan_notify_add_rx_port(vs);
+ udp_tunnel_notify_add_rx_port(sock,
+ (vs->flags & VXLAN_F_GPE) ?
+ UDP_TUNNEL_TYPE_VXLAN_GPE :
+ UDP_TUNNEL_TYPE_VXLAN);
spin_unlock(&vn->sock_lock);
/* Mark socket as an encapsulation socket. */
struct net_device *lowerdev = NULL;
if (conf->flags & VXLAN_F_GPE) {
- if (conf->flags & ~VXLAN_F_ALLOWED_GPE)
- return -EINVAL;
/* For now, allow GPE only together with COLLECT_METADATA.
* This can be relaxed later; in such case, the other side
* of the PtP link will have to be provided.
*/
- if (!(conf->flags & VXLAN_F_COLLECT_METADATA))
+ if ((conf->flags & ~VXLAN_F_ALLOWED_GPE) ||
+ !(conf->flags & VXLAN_F_COLLECT_METADATA)) {
+ pr_info("unsupported combination of extensions\n");
return -EINVAL;
+ }
vxlan_raw_setup(dev);
} else {
dev->mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
needed_headroom = lowerdev->hard_header_len;
+ } else if (vxlan_addr_multicast(&dst->remote_ip)) {
+ pr_info("multicast destination requires interface to be specified\n");
+ return -EINVAL;
}
if (conf->mtu) {
tmp->cfg.saddr.sa.sa_family == AF_INET6) == use_ipv6 &&
tmp->cfg.dst_port == vxlan->cfg.dst_port &&
(tmp->flags & VXLAN_F_RCV_FLAGS) ==
- (vxlan->flags & VXLAN_F_RCV_FLAGS))
- return -EEXIST;
+ (vxlan->flags & VXLAN_F_RCV_FLAGS)) {
+ pr_info("duplicate VNI %u\n", be32_to_cpu(conf->vni));
+ return -EEXIST;
+ }
}
dev->ethtool_ops = &vxlan_ethtool_ops;
struct nlattr *tb[], struct nlattr *data[])
{
struct vxlan_config conf;
- int err;
memset(&conf, 0, sizeof(conf));
if (tb[IFLA_MTU])
conf.mtu = nla_get_u32(tb[IFLA_MTU]);
- err = vxlan_dev_configure(src_net, dev, &conf);
- switch (err) {
- case -ENODEV:
- pr_info("ifindex %d does not exist\n", conf.remote_ifindex);
- break;
-
- case -EPERM:
- pr_info("IPv6 is disabled via sysctl\n");
- break;
-
- case -EEXIST:
- pr_info("duplicate VNI %u\n", be32_to_cpu(conf.vni));
- break;
-
- case -EINVAL:
- pr_info("unsupported combination of extensions\n");
- break;
- }
-
- return err;
+ return vxlan_dev_configure(src_net, dev, &conf);
}
static void vxlan_dellink(struct net_device *dev, struct list_head *head)
if (event == NETDEV_UNREGISTER)
vxlan_handle_lowerdev_unregister(vn, dev);
- else if (event == NETDEV_OFFLOAD_PUSH_VXLAN)
+ else if (event == NETDEV_UDP_TUNNEL_PUSH_INFO)
vxlan_push_rx_ports(dev);
return NOTIFY_DONE;