There are no module parameters for this driver and it can be configured
using IProute2/ip utility.
- ip link add link <master-dev> <slave-dev> type ipvlan mode { l2 | L3 }
+ ip link add link <master-dev> <slave-dev> type ipvlan mode { l2 | l3 | l3s }
e.g. ip link add link ipvl0 eth0 type ipvlan mode l2
used before packets are queued on the outbound device. In this mode the slaves
will not receive nor can send multicast / broadcast traffic.
+4.3 L3S mode:
+ This is very similar to the L3 mode except that iptables (conn-tracking)
+works in this mode and hence it is L3-symmetric (L3s). This will have slightly less
+performance but that shouldn't matter since you are choosing this mode over plain-L3
+mode to make conn-tracking work.
5. What to choose (macvlan vs. ipvlan)?
These two devices are very similar in many regards and the specific use
tristate "IP-VLAN support"
depends on INET
depends on IPV6
+ depends on NET_L3_MASTER_DEV
---help---
This allows one to create virtual devices off of a main interface
and packets will be delivered based on the dest L3 (IPv6/IPv4 addr)
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <linux/inetdevice.h>
+#include <linux/netfilter.h>
#include <net/ip.h>
#include <net/ip6_route.h>
#include <net/rtnetlink.h>
#include <net/route.h>
#include <net/addrconf.h>
+#include <net/l3mdev.h>
#define IPVLAN_DRV "ipvlan"
#define IPV_DRV_VER "0.1"
const void *iaddr, bool is_v6);
bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6);
void ipvlan_ht_addr_del(struct ipvl_addr *addr);
+struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
+ u16 proto);
+unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state);
#endif /* __IPVLAN_H */
case IPVLAN_MODE_L2:
return ipvlan_xmit_mode_l2(skb, dev);
case IPVLAN_MODE_L3:
+ case IPVLAN_MODE_L3S:
return ipvlan_xmit_mode_l3(skb, dev);
}
return ipvlan_handle_mode_l2(pskb, port);
case IPVLAN_MODE_L3:
return ipvlan_handle_mode_l3(pskb, port);
+ case IPVLAN_MODE_L3S:
+ return RX_HANDLER_PASS;
}
/* Should not reach here */
kfree_skb(skb);
return RX_HANDLER_CONSUMED;
}
+
+static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct ipvl_addr *addr = NULL;
+ struct ipvl_port *port;
+ void *lyr3h;
+ int addr_type;
+
+ if (!dev || !netif_is_ipvlan_port(dev))
+ goto out;
+
+ port = ipvlan_port_get_rcu(dev);
+ if (!port || port->mode != IPVLAN_MODE_L3S)
+ goto out;
+
+ lyr3h = ipvlan_get_L3_hdr(skb, &addr_type);
+ if (!lyr3h)
+ goto out;
+
+ addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
+out:
+ return addr;
+}
+
+struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
+ u16 proto)
+{
+ struct ipvl_addr *addr;
+ struct net_device *sdev;
+
+ addr = ipvlan_skb_to_addr(skb, dev);
+ if (!addr)
+ goto out;
+
+ sdev = addr->master->dev;
+ switch (proto) {
+ case AF_INET:
+ {
+ int err;
+ struct iphdr *ip4h = ip_hdr(skb);
+
+ err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr,
+ ip4h->tos, sdev);
+ if (unlikely(err))
+ goto out;
+ break;
+ }
+ case AF_INET6:
+ {
+ struct dst_entry *dst;
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ int flags = RT6_LOOKUP_F_HAS_SADDR;
+ struct flowi6 fl6 = {
+ .flowi6_iif = sdev->ifindex,
+ .daddr = ip6h->daddr,
+ .saddr = ip6h->saddr,
+ .flowlabel = ip6_flowinfo(ip6h),
+ .flowi6_mark = skb->mark,
+ .flowi6_proto = ip6h->nexthdr,
+ };
+
+ skb_dst_drop(skb);
+ dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags);
+ skb_dst_set(skb, dst);
+ break;
+ }
+ default:
+ break;
+ }
+
+out:
+ return skb;
+}
+
+unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct ipvl_addr *addr;
+ unsigned int len;
+
+ addr = ipvlan_skb_to_addr(skb, skb->dev);
+ if (!addr)
+ goto out;
+
+ skb->dev = addr->master->dev;
+ len = skb->len + ETH_HLEN;
+ ipvlan_count_rx(addr->master, len, true, false);
+out:
+ return NF_ACCEPT;
+}
#include "ipvlan.h"
+static u32 ipvl_nf_hook_refcnt = 0;
+
+static struct nf_hook_ops ipvl_nfops[] __read_mostly = {
+ {
+ .hook = ipvlan_nf_input,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = INT_MAX,
+ },
+ {
+ .hook = ipvlan_nf_input,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = INT_MAX,
+ },
+};
+
+static struct l3mdev_ops ipvl_l3mdev_ops __read_mostly = {
+ .l3mdev_l3_rcv = ipvlan_l3_rcv,
+};
+
static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev)
{
ipvlan->dev->mtu = dev->mtu - ipvlan->mtu_adj;
}
-static void ipvlan_set_port_mode(struct ipvl_port *port, u16 nval)
+static int ipvlan_register_nf_hook(void)
+{
+ int err = 0;
+
+ if (!ipvl_nf_hook_refcnt) {
+ err = _nf_register_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops));
+ if (!err)
+ ipvl_nf_hook_refcnt = 1;
+ } else {
+ ipvl_nf_hook_refcnt++;
+ }
+
+ return err;
+}
+
+static void ipvlan_unregister_nf_hook(void)
+{
+ WARN_ON(!ipvl_nf_hook_refcnt);
+
+ ipvl_nf_hook_refcnt--;
+ if (!ipvl_nf_hook_refcnt)
+ _nf_unregister_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops));
+}
+
+static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval)
{
struct ipvl_dev *ipvlan;
+ struct net_device *mdev = port->dev;
+ int err = 0;
+ ASSERT_RTNL();
if (port->mode != nval) {
+ if (nval == IPVLAN_MODE_L3S) {
+ /* New mode is L3S */
+ err = ipvlan_register_nf_hook();
+ if (!err) {
+ mdev->l3mdev_ops = &ipvl_l3mdev_ops;
+ mdev->priv_flags |= IFF_L3MDEV_MASTER;
+ } else
+ return err;
+ } else if (port->mode == IPVLAN_MODE_L3S) {
+ /* Old mode was L3S */
+ mdev->priv_flags &= ~IFF_L3MDEV_MASTER;
+ ipvlan_unregister_nf_hook();
+ mdev->l3mdev_ops = NULL;
+ }
list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
- if (nval == IPVLAN_MODE_L3)
+ if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S)
ipvlan->dev->flags |= IFF_NOARP;
else
ipvlan->dev->flags &= ~IFF_NOARP;
}
port->mode = nval;
}
+ return err;
}
static int ipvlan_port_create(struct net_device *dev)
struct ipvl_port *port = ipvlan_port_get_rtnl(dev);
dev->priv_flags &= ~IFF_IPVLAN_MASTER;
+ if (port->mode == IPVLAN_MODE_L3S) {
+ dev->priv_flags &= ~IFF_L3MDEV_MASTER;
+ ipvlan_unregister_nf_hook();
+ dev->l3mdev_ops = NULL;
+ }
netdev_rx_handler_unregister(dev);
cancel_work_sync(&port->wq);
__skb_queue_purge(&port->backlog);
struct net_device *phy_dev = ipvlan->phy_dev;
struct ipvl_addr *addr;
- if (ipvlan->port->mode == IPVLAN_MODE_L3)
+ if (ipvlan->port->mode == IPVLAN_MODE_L3 ||
+ ipvlan->port->mode == IPVLAN_MODE_L3S)
dev->flags |= IFF_NOARP;
else
dev->flags &= ~IFF_NOARP;
{
struct ipvl_dev *ipvlan = netdev_priv(dev);
struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev);
+ int err = 0;
if (data && data[IFLA_IPVLAN_MODE]) {
u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
- ipvlan_set_port_mode(port, nmode);
+ err = ipvlan_set_port_mode(port, nmode);
}
- return 0;
+ return err;
}
static size_t ipvlan_nl_getsize(const struct net_device *dev)
unregister_netdevice(dev);
return err;
}
+ err = ipvlan_set_port_mode(port, mode);
+ if (err) {
+ unregister_netdevice(dev);
+ return err;
+ }
list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans);
- ipvlan_set_port_mode(port, mode);
-
netif_stacked_transfer_operstate(phy_dev, dev);
return 0;
}
void nf_unregister_hook(struct nf_hook_ops *reg);
int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n);
void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n);
+int _nf_register_hooks(struct nf_hook_ops *reg, unsigned int n);
+void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n);
/* Functions to register get/setsockopt ranges (non-inclusive). You
need to check permissions yourself! */
}
void ip6_route_input(struct sk_buff *skb);
+struct dst_entry *ip6_route_input_lookup(struct net *net,
+ struct net_device *dev,
+ struct flowi6 *fl6, int flags);
struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
struct flowi6 *fl6, int flags);
enum ipvlan_mode {
IPVLAN_MODE_L2 = 0,
IPVLAN_MODE_L3,
+ IPVLAN_MODE_L3S,
IPVLAN_MODE_MAX
};
return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
}
-static struct dst_entry *ip6_route_input_lookup(struct net *net,
- struct net_device *dev,
- struct flowi6 *fl6, int flags)
+struct dst_entry *ip6_route_input_lookup(struct net *net,
+ struct net_device *dev,
+ struct flowi6 *fl6, int flags)
{
if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
flags |= RT6_LOOKUP_F_IFACE;
return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
}
+EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
void ip6_route_input(struct sk_buff *skb)
{
static LIST_HEAD(nf_hook_list);
-int nf_register_hook(struct nf_hook_ops *reg)
+static int _nf_register_hook(struct nf_hook_ops *reg)
{
struct net *net, *last;
int ret;
- rtnl_lock();
for_each_net(net) {
ret = nf_register_net_hook(net, reg);
if (ret && ret != -ENOENT)
goto rollback;
}
list_add_tail(®->list, &nf_hook_list);
- rtnl_unlock();
return 0;
rollback:
break;
nf_unregister_net_hook(net, reg);
}
+ return ret;
+}
+
+int nf_register_hook(struct nf_hook_ops *reg)
+{
+ int ret;
+
+ rtnl_lock();
+ ret = _nf_register_hook(reg);
rtnl_unlock();
+
return ret;
}
EXPORT_SYMBOL(nf_register_hook);
-void nf_unregister_hook(struct nf_hook_ops *reg)
+static void _nf_unregister_hook(struct nf_hook_ops *reg)
{
struct net *net;
- rtnl_lock();
list_del(®->list);
for_each_net(net)
nf_unregister_net_hook(net, reg);
+}
+
+void nf_unregister_hook(struct nf_hook_ops *reg)
+{
+ rtnl_lock();
+ _nf_unregister_hook(reg);
rtnl_unlock();
}
EXPORT_SYMBOL(nf_unregister_hook);
}
EXPORT_SYMBOL(nf_register_hooks);
+/* Caller MUST take rtnl_lock() */
+int _nf_register_hooks(struct nf_hook_ops *reg, unsigned int n)
+{
+ unsigned int i;
+ int err = 0;
+
+ for (i = 0; i < n; i++) {
+ err = _nf_register_hook(®[i]);
+ if (err)
+ goto err;
+ }
+ return err;
+
+err:
+ if (i > 0)
+ _nf_unregister_hooks(reg, i);
+ return err;
+}
+EXPORT_SYMBOL(_nf_register_hooks);
+
void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
{
while (n-- > 0)
}
EXPORT_SYMBOL(nf_unregister_hooks);
+/* Caller MUST take rtnl_lock */
+void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
+{
+ while (n-- > 0)
+ _nf_unregister_hook(®[n]);
+}
+EXPORT_SYMBOL(_nf_unregister_hooks);
+
unsigned int nf_iterate(struct list_head *head,
struct sk_buff *skb,
struct nf_hook_state *state,