PORTING.md \
README.md \
README-lisp.md \
+ README-native-tunneling.md \
REPORTING-BUGS.md \
TODO.md \
.travis.yml \
- A simple wrapper script, 'ovs-docker', to integrate OVS with Docker
containers. If and when there is a native integration of Open vSwitch
with Docker, the wrapper script will be retired.
+ - Added support for DPDK Tunneling. VXLAN and GRE are supported protocols.
+ This is generic tunneling mechanism for userspace datapath.
v2.3.0 - 14 Aug 2014
--- /dev/null
+
+Open vSwitch supports tunneling in userspace. Tunneling is implemented in
+platform independent way.
+
+Setup:
+======
+Setup physical bridges for all physical interfaces. Create integration bridge.
+Add VXLAN port to int-bridge. Assign IP address to physical bridge where
+VXLAN traffic is expected.
+
+Example:
+========
+Connect to VXLAN tunnel endpoint logical ip: 192.168.1.2 and 192.168.1.1.
+
+Configure OVS bridges as follows.
+
+1. Lets assume 172.168.1.2/24 network is reachable via eth1 create physical bridge br-eth1
+ assign ip address (172.168.1.1/24) to br-eth1, Add eth1 to br-eth1
+2. Check ovs cached routes using appctl command
+ ovs-appctl ovs/route/show
+ Add tunnel route if not present in OVS route table.
+ ovs-appctl ovs/route/add 172.168.1.1/24 br-eth1
+3. Add integration brdge int-br and add tunnel port using standard syntax.
+ ovs-vsctl add-port int-br vxlan0 -- set interface vxlan0 type=vxlan options:remote_ip=172.168.1.2
+4. Assign IP address to int-br, So final topology looks like:
+
+
+ 192.168.1.1/24
+ +--------------+
+ | int-br | 192.168.1.2/24
+ +--------------+ +--------------+
+ | vxlan0 | | vxlan0 |
+ +--------------+ +--------------+
+ | |
+ | |
+ | |
+ 172.168.1.1/24 |
+ +--------------+ |
+ | br-eth1 | 172.168.1.2/24
+ +--------------+ +---------------+
+ | eth1 |----------------------------------| eth1 |
+ +--------------+ +----------------
+
+ Host A with OVS. Remote host.
+
+With this setup, ping to VXLAN target device (192.168.1.2) should work
+There are following commands that shows internal tables:
+
+Tunneling related commands:
+===========================
+Tunnel routing table:
+ To Add route:
+ ovs-appctl ovs/route/add <IP address>/<prefix length> <output-bridge-name> <gw>
+ To see all routes configured:
+ ovs-appctl ovs/route/show
+ To del route:
+ ovs-appctl ovs/route/del <IP address>/<prefix length>
+
+ARP:
+ To see arp cache content:
+ ovs-appctl tnl/arp/show
+ To flush arp cache:
+ ovs-appctl tnl/arp/flush
+
+To check tunnel ports listening in vswitchd:
+ ovs-appctl tnl/ports/show
+
+To set range for VxLan udp source port:
+ To set:
+ ovs-appctl tnl/egress_port_range <num1> <num2>
+ Shows Current range:
+ ovs-appctl tnl/egress_port_range
+
+To check datapath ports:
+ ovs-appctl dpif/show
+
+To check datapath flows:
+ ovs-appctl dpif/dump-flows
+
+Contact
+=======
+bugs@openvswitch.org
uint32_t hash_basis;
};
+#ifndef __KERNEL__
+#define TNL_PUSH_HEADER_SIZE 128
+
+/*
+ * struct ovs_action_push_tnl - %OVS_ACTION_ATTR_TUNNEL_PUSH
+ * @tnl_port: To identify tunnel port to pass header info.
+ * @out_port: Physical port to send encapsulated packet.
+ * @header_len: Length of the header to be pushed.
+ * @tnl_type: This is only required to format this header. Otherwise
+ * ODP layer can not parse %header.
+ * @header: Partial header for the tunnel. Tunnel push action can use
+ * this header to build final header according to actual packet parameters.
+ */
+struct ovs_action_push_tnl {
+ uint32_t tnl_port;
+ uint32_t out_port;
+ uint32_t header_len;
+ uint32_t tnl_type; /* For logging. */
+ uint8_t header[TNL_PUSH_HEADER_SIZE];
+};
+#endif
+
/**
* enum ovs_action_attr - Action types.
*
* Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all
* fields within a header are modifiable, e.g. the IPv4 protocol and fragment
* type may not be changed.
+ *
+ * @OVS_ACTION_ATTR_TUNNEL_PUSH: Push tunnel header described by struct
+ * ovs_action_push_tnl.
+ * @OVS_ACTION_ATTR_TUNNEL_POP: Lookup tunnel port by port-no passed and pop
+ * tunnel header.
*/
enum ovs_action_attr {
* The data must be zero for the unmasked
* bits. */
+#ifndef __KERNEL__
+ OVS_ACTION_ATTR_TUNNEL_PUSH, /* struct ovs_action_push_tnl*/
+ OVS_ACTION_ATTR_TUNNEL_POP, /* u32 port number. */
+#endif
__OVS_ACTION_ATTR_MAX
};
FAQ.md
INSTALL.DPDK.md
+README-native-tunneling.md
lib/timer.h \
lib/timeval.c \
lib/timeval.h \
+ lib/tnl-arp-cache.c \
+ lib/tnl-arp-cache.h \
+ lib/tnl-ports.c \
+ lib/tnl-ports.h \
lib/token-bucket.c \
lib/token-bucket.h \
lib/type-props.h \
#include "shash.h"
#include "sset.h"
#include "timeval.h"
+#include "tnl-arp-cache.h"
#include "unixctl.h"
#include "util.h"
#include "vlog.h"
* for pin of pmd threads. */
size_t n_dpdk_rxqs;
char *pmd_cmask;
+ uint64_t last_tnl_conf_seq;
};
static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp,
return error;
}
+ dp->last_tnl_conf_seq = seq_read(tnl_conf_seq);
*dpp = dp;
return 0;
}
}
}
-static void
+/* Return true if needs to revalidate datapath flows. */
+static bool
dpif_netdev_run(struct dpif *dpif)
{
struct dp_netdev_port *port;
struct dp_netdev *dp = get_dp_netdev(dpif);
struct dp_netdev_pmd_thread *non_pmd = dp_netdev_get_nonpmd(dp);
+ uint64_t new_tnl_seq;
ovs_mutex_lock(&dp->non_pmd_mutex);
CMAP_FOR_EACH (port, node, &dp->ports) {
}
}
ovs_mutex_unlock(&dp->non_pmd_mutex);
+ tnl_arp_cache_run();
+ new_tnl_seq = seq_read(tnl_conf_seq);
+
+ if (dp->last_tnl_conf_seq != new_tnl_seq) {
+ dp->last_tnl_conf_seq = new_tnl_seq;
+ return true;
+ }
+ return false;
}
static void
}
}
ovs_mutex_unlock(&dp_netdev_mutex);
+ seq_wait(tnl_conf_seq, dp->last_tnl_conf_seq);
}
struct rxq_poll {
static void
dp_netdev_drop_packets(struct dpif_packet ** packets, int cnt, bool may_steal)
{
- int i;
-
if (may_steal) {
+ int i;
+
for (i = 0; i < cnt; i++) {
dpif_packet_delete(packets[i]);
}
}
}
+static int
+push_tnl_action(const struct dp_netdev *dp,
+ const struct nlattr *attr,
+ struct dpif_packet **packets, int cnt)
+{
+ struct dp_netdev_port *tun_port;
+ const struct ovs_action_push_tnl *data;
+
+ data = nl_attr_get(attr);
+
+ tun_port = dp_netdev_lookup_port(dp, u32_to_odp(data->tnl_port));
+ if (!tun_port) {
+ return -EINVAL;
+ }
+ netdev_push_header(tun_port->netdev, packets, cnt, data);
+
+ return 0;
+}
+
+static void
+dp_netdev_clone_pkt_batch(struct dpif_packet **tnl_pkt,
+ struct dpif_packet **packets, int cnt)
+{
+ int i;
+
+ for (i = 0; i < cnt; i++) {
+ tnl_pkt[i] = dpif_packet_clone(packets[i]);
+ }
+}
+
static void
dp_execute_cb(void *aux_, struct dpif_packet **packets, int cnt,
const struct nlattr *a, bool may_steal)
}
break;
+ case OVS_ACTION_ATTR_TUNNEL_PUSH:
+ if (*depth < MAX_RECIRC_DEPTH) {
+ struct dpif_packet *tnl_pkt[NETDEV_MAX_RX_BATCH];
+ int err;
+
+ if (!may_steal) {
+ dp_netdev_clone_pkt_batch(tnl_pkt, packets, cnt);
+ packets = tnl_pkt;
+ }
+
+ err = push_tnl_action(dp, a, packets, cnt);
+ if (!err) {
+ (*depth)++;
+ dp_netdev_input(pmd, packets, cnt);
+ (*depth)--;
+ } else {
+ dp_netdev_drop_packets(tnl_pkt, cnt, !may_steal);
+ }
+ return;
+ }
+ break;
+
+ case OVS_ACTION_ATTR_TUNNEL_POP:
+ if (*depth < MAX_RECIRC_DEPTH) {
+ odp_port_t portno = u32_to_odp(nl_attr_get_u32(a));
+
+ p = dp_netdev_lookup_port(dp, portno);
+ if (p) {
+ struct dpif_packet *tnl_pkt[NETDEV_MAX_RX_BATCH];
+ int err;
+
+ if (!may_steal) {
+ dp_netdev_clone_pkt_batch(tnl_pkt, packets, cnt);
+ packets = tnl_pkt;
+ }
+
+ err = netdev_pop_header(p->netdev, packets, cnt);
+ if (!err) {
+
+ for (i = 0; i < cnt; i++) {
+ packets[i]->md.in_port.odp_port = portno;
+ }
+
+ (*depth)++;
+ dp_netdev_input(pmd, packets, cnt);
+ (*depth)--;
+ } else {
+ dp_netdev_drop_packets(tnl_pkt, cnt, !may_steal);
+ }
+ return;
+ }
+ }
+ break;
+
case OVS_ACTION_ATTR_USERSPACE:
if (!fat_rwlock_tryrdlock(&dp->upcall_rwlock)) {
const struct nlattr *userdata;
return dpif_netlink_dp_transact(&dp, NULL, NULL);
}
-static void
+static bool
dpif_netlink_run(struct dpif *dpif_)
{
struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
dpif_netlink_refresh_channels(dpif, dpif->n_handlers);
fat_rwlock_unlock(&dpif->upcall_lock);
}
+ return false;
}
static int
* the 'close' member function. */
int (*destroy)(struct dpif *dpif);
- /* Performs periodic work needed by 'dpif', if any is necessary. */
- void (*run)(struct dpif *dpif);
+ /* Performs periodic work needed by 'dpif', if any is necessary.
+ * Returns true if need to revalidate. */
+ bool (*run)(struct dpif *dpif);
/* Arranges for poll_block() to wake up if the "run" member function needs
* to be called for 'dpif'. */
#include "shash.h"
#include "sset.h"
#include "timeval.h"
+#include "tnl-arp-cache.h"
+#include "tnl-ports.h"
#include "util.h"
#include "valgrind.h"
#include "vlog.h"
}
dpctl_unixctl_register();
ovsthread_once_done(&once);
+ tnl_port_map_init();
+ tnl_arp_cache_init();
}
}
}
/* Performs periodic work needed by 'dpif'. */
-void
+bool
dpif_run(struct dpif *dpif)
{
if (dpif->dpif_class->run) {
- dpif->dpif_class->run(dpif);
+ return dpif->dpif_class->run(dpif);
}
+ return false;
}
/* Arranges for poll_block() to wake up when dp_run() needs to be called for
switch ((enum ovs_action_attr)type) {
case OVS_ACTION_ATTR_OUTPUT:
+ case OVS_ACTION_ATTR_TUNNEL_PUSH:
+ case OVS_ACTION_ATTR_TUNNEL_POP:
case OVS_ACTION_ATTR_USERSPACE:
case OVS_ACTION_ATTR_RECIRC: {
struct dpif_execute execute;
get->flow->actions, get->flow->actions_len);
}
}
+
+bool
+dpif_supports_tnl_push_pop(const struct dpif *dpif)
+{
+ return !strcmp(dpif->dpif_class->type, "netdev") ||
+ !strcmp(dpif->dpif_class->type, "dummy");
+}
int dpif_create_and_open(const char *name, const char *type, struct dpif **);
void dpif_close(struct dpif *);
-void dpif_run(struct dpif *);
+bool dpif_run(struct dpif *);
void dpif_wait(struct dpif *);
const char *dpif_name(const struct dpif *);
uint32_t *priority);
char *dpif_get_dp_version(const struct dpif *);
+bool dpif_supports_tnl_push_pop(const struct dpif *);
#ifdef __cplusplus
}
#endif
NULL, /* get_config */ \
NULL, /* set_config */ \
NULL, /* get_tunnel_config */ \
+ NULL, /* build header */ \
+ NULL, /* push header */ \
+ NULL, /* pop header */ \
NULL, /* get_numa_id */ \
NULL, /* set_multiq */ \
\
netdev_dpdk_get_config, \
NULL, /* netdev_dpdk_set_config */ \
NULL, /* get_tunnel_config */ \
+ NULL, /* build header */ \
+ NULL, /* push header */ \
+ NULL, /* pop header */ \
netdev_dpdk_get_numa_id, /* get_numa_id */ \
MULTIQ, /* set_multiq */ \
\
FILE *tx_pcap, *rxq_pcap OVS_GUARDED;
+ struct in_addr address, netmask;
struct list rxes OVS_GUARDED; /* List of child "netdev_rxq_dummy"s. */
};
return 0;
}
+static int
+netdev_dummy_get_in4(const struct netdev *netdev_,
+ struct in_addr *address, struct in_addr *netmask)
+{
+ struct netdev_dummy *netdev = netdev_dummy_cast(netdev_);
+
+ ovs_mutex_lock(&netdev->mutex);
+ *address = netdev->address;
+ *netmask = netdev->netmask;
+ ovs_mutex_unlock(&netdev->mutex);
+ return 0;
+}
+
+static int
+netdev_dummy_set_in4(struct netdev *netdev_, struct in_addr address,
+ struct in_addr netmask)
+{
+ struct netdev_dummy *netdev = netdev_dummy_cast(netdev_);
+
+ ovs_mutex_lock(&netdev->mutex);
+ netdev->address = address;
+ netdev->netmask = netmask;
+ ovs_mutex_unlock(&netdev->mutex);
+
+ return 0;
+}
+
static int
netdev_dummy_set_config(struct netdev *netdev_, const struct smap *args)
{
netdev_dummy_get_config,
netdev_dummy_set_config,
NULL, /* get_tunnel_config */
+ NULL, /* build header */
+ NULL, /* push header */
+ NULL, /* pop header */
NULL, /* get_numa_id */
NULL, /* set_multiq */
NULL, /* queue_dump_done */
NULL, /* dump_queue_stats */
- NULL, /* get_in4 */
+ netdev_dummy_get_in4, /* get_in4 */
NULL, /* set_in4 */
NULL, /* get_in6 */
NULL, /* add_router */
ds_destroy(&s);
}
+static void
+netdev_dummy_ip4addr(struct unixctl_conn *conn, int argc OVS_UNUSED,
+ const char *argv[], void *aux OVS_UNUSED)
+{
+ struct netdev *netdev = netdev_from_name(argv[1]);
+
+ if (netdev && is_dummy_class(netdev->netdev_class)) {
+ struct in_addr ip;
+ uint16_t plen;
+
+ if (ovs_scan(argv[2], IP_SCAN_FMT"/%"SCNi16,
+ IP_SCAN_ARGS(&ip.s_addr), &plen)) {
+ struct in_addr mask;
+
+ mask.s_addr = be32_prefix_mask(plen);
+ netdev_dummy_set_in4(netdev, ip, mask);
+ unixctl_command_reply(conn, "OK");
+ } else {
+ unixctl_command_reply(conn, "Invalid parameters");
+ }
+
+ netdev_close(netdev);
+ } else {
+ unixctl_command_reply_error(conn, "Unknown Dummy Interface");
+ netdev_close(netdev);
+ return;
+ }
+
+}
+
void
netdev_dummy_register(bool override)
{
unixctl_command_register("netdev-dummy/conn-state",
"[netdev]", 0, 1,
netdev_dummy_conn_state, NULL);
+ unixctl_command_register("netdev-dummy/ip4addr",
+ "[netdev] ipaddr/mask-prefix-len", 2, 2,
+ netdev_dummy_ip4addr, NULL);
+
if (override) {
struct sset types;
NULL, /* get_config */ \
NULL, /* set_config */ \
NULL, /* get_tunnel_config */ \
+ NULL, /* build header */ \
+ NULL, /* push header */ \
+ NULL, /* pop header */ \
NULL, /* get_numa_id */ \
NULL, /* set_multiq */ \
\
const struct netdev_tunnel_config *
(*get_tunnel_config)(const struct netdev *netdev);
+ /* Build Partial Tunnel header. Ethernet and ip header is already built,
+ * build_header() is suppose build protocol specific part of header. */
+ int (*build_header)(const struct netdev *, struct ovs_action_push_tnl *data);
+
+ /* build_header() can not build entire header for all packets for given
+ * flow. Push header is called for packet to build header specific to
+ * a packet on actual transmit. It uses partial header build by
+ * build_header() which is passed as data. */
+ int (*push_header)(const struct netdev *netdev,
+ struct dpif_packet **buffers, int cnt,
+ const struct ovs_action_push_tnl *data);
+
+ /* Pop tunnel header from packet, build tunnel metadata and resize packet
+ * for further processing. */
+ int (*pop_header)(struct netdev *netdev,
+ struct dpif_packet **buffers, int cnt);
+
/* Returns the id of the numa node the 'netdev' is on. If there is no
* such info, returns NETDEV_NUMA_UNSPEC. */
int (*get_numa_id)(const struct netdev *netdev);
#include <sys/ioctl.h>
#include "byte-order.h"
+#include "csum.h"
#include "daemon.h"
#include "dirs.h"
#include "dpif.h"
+#include "dynamic-string.h"
+#include "flow.h"
#include "hash.h"
#include "hmap.h"
#include "list.h"
#include "netdev-provider.h"
+#include "odp-netlink.h"
#include "ofpbuf.h"
#include "ovs-router.h"
#include "packets.h"
+#include "packet-dpif.h"
#include "poll-loop.h"
#include "route-table.h"
#include "shash.h"
#include "socket-util.h"
#include "vlog.h"
+#include "unaligned.h"
+#include "unixctl.h"
+#include "util.h"
VLOG_DEFINE_THIS_MODULE(netdev_vport);
+static struct vlog_rate_limit err_rl = VLOG_RATE_LIMIT_INIT(60, 5);
#define GENEVE_DST_PORT 6081
#define VXLAN_DST_PORT 4789
#define LISP_DST_PORT 4341
+#define VXLAN_HLEN (sizeof(struct eth_header) + \
+ sizeof(struct ip_header) + \
+ sizeof(struct udp_header) + \
+ sizeof(struct vxlanhdr))
+
#define DEFAULT_TTL 64
struct netdev_vport {
static int get_tunnel_config(const struct netdev *, struct smap *args);
static bool tunnel_check_status_change__(struct netdev_vport *);
+static uint16_t tnl_udp_port_min = 32768;
+static uint16_t tnl_udp_port_max = 61000;
+
static bool
is_vport_class(const struct netdev_class *class)
{
static int
netdev_vport_construct(struct netdev *netdev_)
{
- struct netdev_vport *netdev = netdev_vport_cast(netdev_);
+ struct netdev_vport *dev = netdev_vport_cast(netdev_);
+ const char *type = netdev_get_type(netdev_);
- ovs_mutex_init(&netdev->mutex);
- eth_addr_random(netdev->etheraddr);
+ ovs_mutex_init(&dev->mutex);
+ eth_addr_random(dev->etheraddr);
+
+ /* Add a default destination port for tunnel ports if none specified. */
+ if (!strcmp(type, "geneve")) {
+ dev->tnl_cfg.dst_port = htons(GENEVE_DST_PORT);
+ } else if (!strcmp(type, "vxlan")) {
+ dev->tnl_cfg.dst_port = htons(VXLAN_DST_PORT);
+ } else if (!strcmp(type, "lisp")) {
+ dev->tnl_cfg.dst_port = htons(LISP_DST_PORT);
+ }
route_table_register();
ipsec_mech_set = false;
memset(&tnl_cfg, 0, sizeof tnl_cfg);
+ /* Add a default destination port for tunnel ports if none specified. */
+ if (!strcmp(type, "geneve")) {
+ tnl_cfg.dst_port = htons(GENEVE_DST_PORT);
+ }
+
+ if (!strcmp(type, "vxlan")) {
+ tnl_cfg.dst_port = htons(VXLAN_DST_PORT);
+ }
+
+ if (!strcmp(type, "lisp")) {
+ tnl_cfg.dst_port = htons(LISP_DST_PORT);
+ }
+
needs_dst_port = netdev_vport_needs_dst_port(dev_);
tnl_cfg.ipsec = strstr(type, "ipsec");
tnl_cfg.dont_fragment = true;
}
}
- /* Add a default destination port for tunnel ports if none specified. */
- if (!strcmp(type, "geneve") && !tnl_cfg.dst_port) {
- tnl_cfg.dst_port = htons(GENEVE_DST_PORT);
- }
-
- if (!strcmp(type, "vxlan") && !tnl_cfg.dst_port) {
- tnl_cfg.dst_port = htons(VXLAN_DST_PORT);
- }
-
- if (!strcmp(type, "lisp") && !tnl_cfg.dst_port) {
- tnl_cfg.dst_port = htons(LISP_DST_PORT);
- }
-
if (tnl_cfg.ipsec) {
static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
static pid_t pid = 0;
return 0;
}
+
+\f
+/* Tunnel push pop ops. */
+
+static struct ip_header *
+ip_hdr(void *eth)
+{
+ return (void *)((char *)eth + sizeof (struct eth_header));
+}
+
+static struct gre_base_hdr *
+gre_hdr(struct ip_header *ip)
+{
+ return (void *)((char *)ip + sizeof (struct ip_header));
+}
+
+static void *
+ip_extract_tnl_md(struct ofpbuf *packet, struct flow_tnl *tnl)
+{
+ struct ip_header *nh;
+ void *l4;
+
+ nh = ofpbuf_l3(packet);
+ l4 = ofpbuf_l4(packet);
+
+ if (!nh || !l4) {
+ return NULL;
+ }
+
+ tnl->ip_src = get_16aligned_be32(&nh->ip_src);
+ tnl->ip_dst = get_16aligned_be32(&nh->ip_dst);
+ tnl->ip_tos = nh->ip_tos;
+
+ return l4;
+}
+
+/* Pushes the 'size' bytes of 'header' into the headroom of 'packet',
+ * reallocating the packet if necessary. 'header' should contain an Ethernet
+ * header, followed by an IPv4 header (without options), and an L4 header.
+ *
+ * This function sets the IP header's ip_tot_len field (which should be zeroed
+ * as part of 'header') and puts its value into '*ip_tot_size' as well. Also
+ * updates IP header checksum.
+ *
+ * Return pointer to the L4 header added to 'packet'. */
+static void *
+push_ip_header(struct ofpbuf *packet,
+ const void *header, int size, int *ip_tot_size)
+{
+ struct eth_header *eth;
+ struct ip_header *ip;
+
+ eth = ofpbuf_push_uninit(packet, size);
+ *ip_tot_size = ofpbuf_size(packet) - sizeof (struct eth_header);
+
+ memcpy(eth, header, size);
+ ip = ip_hdr(eth);
+ ip->ip_tot_len = htons(*ip_tot_size);
+
+
+ ip->ip_csum = recalc_csum16(ip->ip_csum, 0, ip->ip_tot_len);
+
+ return ip + 1;
+}
+
+static int
+gre_header_len(ovs_be16 flags)
+{
+ int hlen = sizeof(struct eth_header) +
+ sizeof(struct ip_header) + 4;
+
+ if (flags & htons(GRE_CSUM)) {
+ hlen += 4;
+ }
+ if (flags & htons(GRE_KEY)) {
+ hlen += 4;
+ }
+ if (flags & htons(GRE_SEQ)) {
+ hlen += 4;
+ }
+ return hlen;
+}
+
+static int
+parse_gre_header(struct ofpbuf *packet,
+ struct flow_tnl *tnl)
+{
+ const struct gre_base_hdr *greh;
+ ovs_16aligned_be32 *options;
+ int hlen;
+
+ greh = ip_extract_tnl_md(packet, tnl);
+ if (!greh) {
+ return -EINVAL;
+ }
+
+ if (greh->flags & ~(htons(GRE_CSUM | GRE_KEY | GRE_SEQ))) {
+ return -EINVAL;
+ }
+
+ hlen = gre_header_len(greh->flags);
+ if (hlen > ofpbuf_size(packet)) {
+ return -EINVAL;
+ }
+
+ options = (ovs_16aligned_be32 *)(greh + 1);
+ if (greh->flags & htons(GRE_CSUM)) {
+ ovs_be16 pkt_csum;
+
+ pkt_csum = csum(greh, ofpbuf_size(packet) -
+ ((const unsigned char *)greh -
+ (const unsigned char *)ofpbuf_l2(packet)));
+ if (pkt_csum) {
+ return -EINVAL;
+ }
+ tnl->flags = FLOW_TNL_F_CSUM;
+ options++;
+ }
+
+ if (greh->flags & htons(GRE_KEY)) {
+ tnl->tun_id = (OVS_FORCE ovs_be64) ((OVS_FORCE uint64_t)(get_16aligned_be32(options)) << 32);
+ tnl->flags |= FLOW_TNL_F_KEY;
+ options++;
+ }
+
+ if (greh->flags & htons(GRE_SEQ)) {
+ options++;
+ }
+
+ return hlen;
+}
+
+static void
+reset_tnl_md(struct pkt_metadata *md)
+{
+ memset(&md->tunnel, 0, sizeof(md->tunnel));
+}
+
+static void
+gre_extract_md(struct dpif_packet *dpif_pkt)
+{
+ struct ofpbuf *packet = &dpif_pkt->ofpbuf;
+ struct pkt_metadata *md = &dpif_pkt->md;
+ struct flow_tnl *tnl = &md->tunnel;
+ int hlen = sizeof(struct eth_header) +
+ sizeof(struct ip_header) + 4;
+
+ memset(md, 0, sizeof *md);
+ if (hlen > ofpbuf_size(packet)) {
+ return;
+ }
+
+ hlen = parse_gre_header(packet, tnl);
+ if (hlen < 0) {
+ reset_tnl_md(md);
+ }
+
+ ofpbuf_reset_packet(packet, hlen);
+}
+
+static int
+netdev_gre_pop_header(struct netdev *netdev_ OVS_UNUSED,
+ struct dpif_packet **pkt, int cnt)
+{
+ int i;
+
+ for (i = 0; i < cnt; i++) {
+ gre_extract_md(pkt[i]);
+ }
+ return 0;
+}
+
+static void
+netdev_gre_push_header__(struct ofpbuf *packet,
+ const void *header, int size)
+{
+ struct gre_base_hdr *greh;
+ int ip_tot_size;
+
+ greh = push_ip_header(packet, header, size, &ip_tot_size);
+
+ if (greh->flags & htons(GRE_CSUM)) {
+ ovs_16aligned_be32 *options = (ovs_16aligned_be32 *) (greh + 1);
+
+ put_16aligned_be32(options,
+ (OVS_FORCE ovs_be32) csum(greh, ip_tot_size - sizeof (struct ip_header)));
+ }
+}
+
+static int
+netdev_gre_push_header(const struct netdev *netdev OVS_UNUSED,
+ struct dpif_packet **packets, int cnt,
+ const struct ovs_action_push_tnl *data)
+{
+ int i;
+
+ for (i = 0; i < cnt; i++) {
+ netdev_gre_push_header__(&packets[i]->ofpbuf,
+ data->header, data->header_len);
+ packets[i]->md = PKT_METADATA_INITIALIZER(u32_to_odp(data->out_port));
+ }
+ return 0;
+}
+
+
+static int
+netdev_gre_build_header(const struct netdev *netdev,
+ struct ovs_action_push_tnl *data)
+{
+ struct netdev_vport *dev = netdev_vport_cast(netdev);
+ struct netdev_tunnel_config *tnl_cfg;
+ struct ip_header *ip;
+ struct gre_base_hdr *greh;
+ ovs_16aligned_be32 *options;
+ int hlen;
+
+ /* XXX: RCUfy tnl_cfg. */
+ ovs_mutex_lock(&dev->mutex);
+ tnl_cfg = &dev->tnl_cfg;
+
+ ip = ip_hdr(data->header);
+ ip->ip_proto = IPPROTO_GRE;
+
+ greh = gre_hdr(ip);
+ greh->protocol = htons(ETH_TYPE_TEB);
+ greh->flags = 0;
+
+ options = (ovs_16aligned_be32 *) (greh + 1);
+ if (tnl_cfg->csum) {
+ greh->flags |= htons(GRE_CSUM);
+ put_16aligned_be32(options, 0);
+ options++;
+ }
+
+ if (tnl_cfg->out_key_present) {
+ greh->flags |= htons(GRE_KEY);
+ put_16aligned_be32(options, (OVS_FORCE ovs_be32)
+ ((OVS_FORCE uint64_t) tnl_cfg->out_key >> 32));
+ options++;
+ }
+
+ ovs_mutex_unlock(&dev->mutex);
+
+ hlen = (uint8_t *) options - (uint8_t *) greh;
+
+ data->header_len = sizeof(struct eth_header) +
+ sizeof(struct ip_header) + hlen;
+ data->tnl_type = OVS_VPORT_TYPE_GRE;
+ return 0;
+}
+
+static void
+vxlan_extract_md(struct dpif_packet *dpif_pkt)
+{
+ struct ofpbuf *packet = &dpif_pkt->ofpbuf;
+ struct pkt_metadata *md = &dpif_pkt->md;
+ struct flow_tnl *tnl = &md->tunnel;
+ struct udp_header *udp;
+ struct vxlanhdr *vxh;
+
+ memset(md, 0, sizeof *md);
+ if (VXLAN_HLEN > ofpbuf_size(packet)) {
+ return;
+ }
+
+ udp = ip_extract_tnl_md(packet, tnl);
+ if (!udp) {
+ return;
+ }
+ vxh = (struct vxlanhdr *) (udp + 1);
+
+ if (get_16aligned_be32(&vxh->vx_flags) != htonl(VXLAN_FLAGS) ||
+ (get_16aligned_be32(&vxh->vx_vni) & htonl(0xff))) {
+ VLOG_WARN_RL(&err_rl, "invalid vxlan flags=%#x vni=%#x\n",
+ ntohl(get_16aligned_be32(&vxh->vx_flags)),
+ ntohl(get_16aligned_be32(&vxh->vx_vni)));
+ reset_tnl_md(md);
+ return;
+ }
+ tnl->tp_src = udp->udp_src;
+ tnl->tp_dst = udp->udp_dst;
+ tnl->tun_id = htonll(ntohl(get_16aligned_be32(&vxh->vx_vni)) >> 8);
+
+ ofpbuf_reset_packet(packet, VXLAN_HLEN);
+}
+
+static int
+netdev_vxlan_pop_header(struct netdev *netdev_ OVS_UNUSED,
+ struct dpif_packet **pkt, int cnt)
+{
+ int i;
+
+ for (i = 0; i < cnt; i++) {
+ vxlan_extract_md(pkt[i]);
+ }
+ return 0;
+}
+
+static int
+netdev_vxlan_build_header(const struct netdev *netdev,
+ struct ovs_action_push_tnl *data)
+{
+ struct netdev_vport *dev = netdev_vport_cast(netdev);
+ struct netdev_tunnel_config *tnl_cfg;
+ struct ip_header *ip;
+ struct udp_header *udp;
+ struct vxlanhdr *vxh;
+
+ /* XXX: RCUfy tnl_cfg. */
+ ovs_mutex_lock(&dev->mutex);
+ tnl_cfg = &dev->tnl_cfg;
+
+ ip = ip_hdr(data->header);
+ ip->ip_proto = IPPROTO_UDP;
+
+ udp = (struct udp_header *) (ip + 1);
+ udp->udp_dst = tnl_cfg->dst_port;
+
+ vxh = (struct vxlanhdr *) (udp + 1);
+ put_16aligned_be32(&vxh->vx_flags, htonl(VXLAN_FLAGS));
+ put_16aligned_be32(&vxh->vx_vni, htonl(ntohll(tnl_cfg->out_key) << 8));
+
+ ovs_mutex_unlock(&dev->mutex);
+ data->header_len = VXLAN_HLEN;
+ data->tnl_type = OVS_VPORT_TYPE_VXLAN;
+ return 0;
+}
+
+static ovs_be16
+get_src_port(struct dpif_packet *packet)
+{
+ uint32_t hash;
+
+ hash = dpif_packet_get_dp_hash(packet);
+
+ return htons((((uint64_t) hash * (tnl_udp_port_max - tnl_udp_port_min)) >> 32) +
+ tnl_udp_port_min);
+}
+
+static void
+netdev_vxlan_push_header__(struct dpif_packet *packet,
+ const void *header, int size)
+{
+ struct udp_header *udp;
+ int ip_tot_size;
+
+ udp = push_ip_header(&packet->ofpbuf, header, size, &ip_tot_size);
+
+ /* set udp src port */
+ udp->udp_src = get_src_port(packet);
+ udp->udp_len = htons(ip_tot_size - sizeof (struct ip_header));
+ /* udp_csum is zero */
+}
+
+static int
+netdev_vxlan_push_header(const struct netdev *netdev OVS_UNUSED,
+ struct dpif_packet **packets, int cnt,
+ const struct ovs_action_push_tnl *data)
+{
+ int i;
+
+ for (i = 0; i < cnt; i++) {
+ netdev_vxlan_push_header__(packets[i],
+ data->header, VXLAN_HLEN);
+ packets[i]->md = PKT_METADATA_INITIALIZER(u32_to_odp(data->out_port));
+ }
+ return 0;
+}
+
+static void
+netdev_vport_range(struct unixctl_conn *conn, int argc,
+ const char *argv[], void *aux OVS_UNUSED)
+{
+ int val1, val2;
+
+ if (argc < 3) {
+ struct ds ds = DS_EMPTY_INITIALIZER;
+
+ ds_put_format(&ds, "Tunnel UDP source port range: %"PRIu16"-%"PRIu16"\n",
+ tnl_udp_port_min, tnl_udp_port_max);
+
+ unixctl_command_reply(conn, ds_cstr(&ds));
+ ds_destroy(&ds);
+ return;
+ }
+
+ if (argc != 3) {
+ return;
+ }
+
+ val1 = atoi(argv[1]);
+ if (val1 <= 0 || val1 > UINT16_MAX) {
+ unixctl_command_reply(conn, "Invalid min.");
+ return;
+ }
+ val2 = atoi(argv[2]);
+ if (val2 <= 0 || val2 > UINT16_MAX) {
+ unixctl_command_reply(conn, "Invalid max.");
+ return;
+ }
+
+ if (val1 > val2) {
+ tnl_udp_port_min = val2;
+ tnl_udp_port_max = val1;
+ } else {
+ tnl_udp_port_min = val1;
+ tnl_udp_port_max = val2;
+ }
+ seq_change(tnl_conf_seq);
+
+ unixctl_command_reply(conn, "OK");
+}
+
\f
#define VPORT_FUNCTIONS(GET_CONFIG, SET_CONFIG, \
- GET_TUNNEL_CONFIG, GET_STATUS) \
+ GET_TUNNEL_CONFIG, GET_STATUS, \
+ BUILD_HEADER, \
+ PUSH_HEADER, POP_HEADER) \
NULL, \
netdev_vport_run, \
netdev_vport_wait, \
GET_CONFIG, \
SET_CONFIG, \
GET_TUNNEL_CONFIG, \
+ BUILD_HEADER, \
+ PUSH_HEADER, \
+ POP_HEADER, \
NULL, /* get_numa_id */ \
NULL, /* set_multiq */ \
\
NULL, /* rx_wait */ \
NULL, /* rx_drain */
-#define TUNNEL_CLASS(NAME, DPIF_PORT) \
- { DPIF_PORT, \
- { NAME, VPORT_FUNCTIONS(get_tunnel_config, \
- set_tunnel_config, \
- get_netdev_tunnel_config, \
- tunnel_get_status) }}
+
+#define TUNNEL_CLASS(NAME, DPIF_PORT, BUILD_HEADER, PUSH_HEADER, POP_HEADER) \
+ { DPIF_PORT, \
+ { NAME, VPORT_FUNCTIONS(get_tunnel_config, \
+ set_tunnel_config, \
+ get_netdev_tunnel_config, \
+ tunnel_get_status, \
+ BUILD_HEADER, PUSH_HEADER, POP_HEADER) }}
void
netdev_vport_tunnel_register(void)
/* The name of the dpif_port should be short enough to accomodate adding
* a port number to the end if one is necessary. */
static const struct vport_class vport_classes[] = {
- TUNNEL_CLASS("geneve", "genev_sys"),
- TUNNEL_CLASS("gre", "gre_sys"),
- TUNNEL_CLASS("ipsec_gre", "gre_sys"),
- TUNNEL_CLASS("gre64", "gre64_sys"),
- TUNNEL_CLASS("ipsec_gre64", "gre64_sys"),
- TUNNEL_CLASS("vxlan", "vxlan_sys"),
- TUNNEL_CLASS("lisp", "lisp_sys")
+ TUNNEL_CLASS("geneve", "genev_sys", NULL, NULL, NULL),
+ TUNNEL_CLASS("gre", "gre_sys", netdev_gre_build_header,
+ netdev_gre_push_header,
+ netdev_gre_pop_header),
+ TUNNEL_CLASS("ipsec_gre", "gre_sys", NULL, NULL, NULL),
+ TUNNEL_CLASS("gre64", "gre64_sys", NULL, NULL, NULL),
+ TUNNEL_CLASS("ipsec_gre64", "gre64_sys", NULL, NULL, NULL),
+ TUNNEL_CLASS("vxlan", "vxlan_sys", netdev_vxlan_build_header,
+ netdev_vxlan_push_header,
+ netdev_vxlan_pop_header),
+ TUNNEL_CLASS("lisp", "lisp_sys", NULL, NULL, NULL)
};
static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
for (i = 0; i < ARRAY_SIZE(vport_classes); i++) {
netdev_register_provider(&vport_classes[i].netdev_class);
}
+
+ unixctl_command_register("tnl/egress_port_range", "min max", 0, 2,
+ netdev_vport_range, NULL);
+
ovsthread_once_done(&once);
}
}
{ "patch", VPORT_FUNCTIONS(get_patch_config,
set_patch_config,
NULL,
- NULL) }};
+ NULL, NULL, NULL, NULL) }};
netdev_register_provider(&patch_class.netdev_class);
}
#include "openflow/openflow.h"
#include "packets.h"
#include "poll-loop.h"
+#include "seq.h"
#include "shash.h"
#include "smap.h"
#include "sset.h"
static struct hmap netdev_classes OVS_GUARDED_BY(netdev_class_mutex)
= HMAP_INITIALIZER(&netdev_classes);
+/* Incremented whenever tnl route, arp, etc changes. */
+struct seq *tnl_conf_seq;
+
struct netdev_registered_class {
/* In 'netdev_classes', by class->type. */
struct hmap_node hmap_node OVS_GUARDED_BY(netdev_class_mutex);
#endif
netdev_dpdk_register();
+ tnl_conf_seq = seq_create();
ovsthread_once_done(&once);
}
}
return error;
}
+int
+netdev_pop_header(struct netdev *netdev, struct dpif_packet **buffers, int cnt)
+{
+ return (netdev->netdev_class->pop_header
+ ? netdev->netdev_class->pop_header(netdev, buffers, cnt)
+ : EOPNOTSUPP);
+}
+
+int
+netdev_build_header(const struct netdev *netdev, struct ovs_action_push_tnl *data)
+{
+ if (netdev->netdev_class->build_header) {
+ return netdev->netdev_class->build_header(netdev, data);
+ }
+ return EOPNOTSUPP;
+}
+
+int
+netdev_push_header(const struct netdev *netdev,
+ struct dpif_packet **buffers, int cnt,
+ const struct ovs_action_push_tnl *data)
+{
+ if (netdev->netdev_class->push_header) {
+ return netdev->netdev_class->push_header(netdev, buffers, cnt, data);
+ } else {
+ return -EINVAL;
+ }
+}
+
/* Registers with the poll loop to wake up from the next call to poll_block()
* when the packet transmission queue has sufficient room to transmit a packet
* with netdev_send().
struct in6_addr;
struct smap;
struct sset;
+struct ovs_action_push_tnl;
/* Network device statistics.
*
bool may_steal);
void netdev_send_wait(struct netdev *, int qid);
+int netdev_build_header(const struct netdev *, struct ovs_action_push_tnl *data);
+int netdev_push_header(const struct netdev *netdev,
+ struct dpif_packet **buffers, int cnt,
+ const struct ovs_action_push_tnl *data);
+int netdev_pop_header(struct netdev *netdev, struct dpif_packet **buffers,
+ int cnt);
+
/* Hardware address. */
int netdev_set_etheraddr(struct netdev *, const uint8_t mac[ETH_ADDR_LEN]);
int netdev_get_etheraddr(const struct netdev *, uint8_t mac[ETH_ADDR_LEN]);
netdev_dump_queue_stats_cb *, void *aux);
enum { NETDEV_MAX_RX_BATCH = 256 }; /* Maximum number packets in rx_recv() batch. */
+extern struct seq *tnl_conf_seq;
#ifdef __cplusplus
}
switch ((enum ovs_action_attr) type) {
/* These only make sense in the context of a datapath. */
case OVS_ACTION_ATTR_OUTPUT:
+ case OVS_ACTION_ATTR_TUNNEL_PUSH:
+ case OVS_ACTION_ATTR_TUNNEL_POP:
case OVS_ACTION_ATTR_USERSPACE:
case OVS_ACTION_ATTR_RECIRC:
if (dp_execute_action) {
#include <netinet/icmp6.h>
#include <stdlib.h>
#include <string.h>
+
#include "byte-order.h"
#include "coverage.h"
#include "dpif.h"
#include "packets.h"
#include "simap.h"
#include "timeval.h"
+#include "unaligned.h"
#include "util.h"
#include "vlog.h"
switch ((enum ovs_action_attr) type) {
case OVS_ACTION_ATTR_OUTPUT: return sizeof(uint32_t);
+ case OVS_ACTION_ATTR_TUNNEL_PUSH: return -2;
+ case OVS_ACTION_ATTR_TUNNEL_POP: return sizeof(uint32_t);
case OVS_ACTION_ATTR_USERSPACE: return -2;
case OVS_ACTION_ATTR_PUSH_VLAN: return sizeof(struct ovs_action_push_vlan);
case OVS_ACTION_ATTR_POP_VLAN: return 0;
ds_put_format(ds, ")");
}
+static void
+format_odp_tnl_push_header(struct ds *ds, struct ovs_action_push_tnl *data)
+{
+ const struct eth_header *eth;
+ const struct ip_header *ip;
+ const void *l3;
+
+ eth = (const struct eth_header *)data->header;
+
+ l3 = eth + 1;
+ ip = (const struct ip_header *)l3;
+
+ /* Ethernet */
+ ds_put_format(ds, "header(size=%"PRIu8",type=%"PRIu8",eth(dst=",
+ data->header_len, data->tnl_type);
+ ds_put_format(ds, ETH_ADDR_FMT, ETH_ADDR_ARGS(eth->eth_dst));
+ ds_put_format(ds, ",src=");
+ ds_put_format(ds, ETH_ADDR_FMT, ETH_ADDR_ARGS(eth->eth_src));
+ ds_put_format(ds, ",dl_type=0x%04"PRIx16"),", ntohs(eth->eth_type));
+
+ /* IPv4 */
+ ds_put_format(ds, "ipv4(src="IP_FMT",dst="IP_FMT",proto=%"PRIu8
+ ",tos=%#"PRIx8",ttl=%"PRIu8",frag=0x%"PRIx16"),",
+ IP_ARGS(get_16aligned_be32(&ip->ip_src)),
+ IP_ARGS(get_16aligned_be32(&ip->ip_dst)),
+ ip->ip_proto, ip->ip_tos,
+ ip->ip_ttl,
+ ip->ip_frag_off);
+
+ if (data->tnl_type == OVS_VPORT_TYPE_VXLAN) {
+ const struct vxlanhdr *vxh;
+ const struct udp_header *udp;
+
+ /* UDP */
+ udp = (const struct udp_header *) (ip + 1);
+ ds_put_format(ds, "udp(src=%"PRIu16",dst=%"PRIu16"),",
+ ntohs(udp->udp_src), ntohs(udp->udp_dst));
+
+ /* VxLan */
+ vxh = (const struct vxlanhdr *) (udp + 1);
+ ds_put_format(ds, "vxlan(flags=0x%"PRIx32",vni=0x%"PRIx32")",
+ ntohl(get_16aligned_be32(&vxh->vx_flags)),
+ ntohl(get_16aligned_be32(&vxh->vx_vni)));
+ } else if (data->tnl_type == OVS_VPORT_TYPE_GRE) {
+ const struct gre_base_hdr *greh;
+ ovs_16aligned_be32 *options;
+ void *l4;
+
+ l4 = ((uint8_t *)l3 + sizeof(struct ip_header));
+ greh = (const struct gre_base_hdr *) l4;
+
+ ds_put_format(ds, "gre((flags=0x%"PRIx16",proto=0x%"PRIx16")",
+ greh->flags, ntohs(greh->protocol));
+ options = (ovs_16aligned_be32 *)(greh + 1);
+ if (greh->flags & htons(GRE_CSUM)) {
+ ds_put_format(ds, ",csum=0x%"PRIx32, ntohl(get_16aligned_be32(options)));
+ options++;
+ }
+ if (greh->flags & htons(GRE_KEY)) {
+ ds_put_format(ds, ",key=0x%"PRIx32, ntohl(get_16aligned_be32(options)));
+ options++;
+ }
+ if (greh->flags & htons(GRE_SEQ)) {
+ ds_put_format(ds, ",seq=0x%"PRIx32, ntohl(get_16aligned_be32(options)));
+ options++;
+ }
+ ds_put_format(ds, ")");
+ }
+ ds_put_format(ds, ")");
+}
+
+static void
+format_odp_tnl_push_action(struct ds *ds, const struct nlattr *attr)
+{
+ struct ovs_action_push_tnl *data;
+
+ data = (struct ovs_action_push_tnl *) nl_attr_get(attr);
+
+ ds_put_format(ds, "tnl_push(tnl_port(%"PRIu32"),", data->tnl_port);
+ format_odp_tnl_push_header(ds, data);
+ ds_put_format(ds, ",out_port(%"PRIu32"))", data->out_port);
+}
+
static void
format_odp_action(struct ds *ds, const struct nlattr *a)
{
case OVS_ACTION_ATTR_OUTPUT:
ds_put_format(ds, "%"PRIu32, nl_attr_get_u32(a));
break;
+ case OVS_ACTION_ATTR_TUNNEL_POP:
+ ds_put_format(ds, "tnl_pop(%"PRIu32")", nl_attr_get_u32(a));
+ break;
+ case OVS_ACTION_ATTR_TUNNEL_PUSH:
+ format_odp_tnl_push_action(ds, a);
+ break;
case OVS_ACTION_ATTR_USERSPACE:
format_odp_userspace_action(ds, a);
break;
return -EINVAL;
}
+static int
+ovs_parse_tnl_push(const char *s, struct ovs_action_push_tnl *data)
+{
+ struct eth_header *eth;
+ struct ip_header *ip;
+ struct udp_header *udp;
+ struct gre_base_hdr *greh;
+ uint16_t gre_proto, dl_type, udp_src, udp_dst;
+ ovs_be32 sip, dip;
+ uint32_t tnl_type = 0, header_len = 0;
+ void *l3, *l4;
+ int n = 0;
+
+ if (!ovs_scan_len(s, &n, "tnl_push(tnl_port(%"SCNi32"),", &data->tnl_port)) {
+ return -EINVAL;
+ }
+ eth = (struct eth_header *) data->header;
+ l3 = (data->header + sizeof *eth);
+ l4 = ((uint8_t *) l3 + sizeof (struct ip_header));
+ ip = (struct ip_header *) l3;
+ if (!ovs_scan_len(s, &n, "header(size=%"SCNi32",type=%"SCNi32","
+ "eth(dst="ETH_ADDR_SCAN_FMT",",
+ &data->header_len,
+ &data->tnl_type,
+ ETH_ADDR_SCAN_ARGS(eth->eth_dst))) {
+ return -EINVAL;
+ }
+
+ if (!ovs_scan_len(s, &n, "src="ETH_ADDR_SCAN_FMT",",
+ ETH_ADDR_SCAN_ARGS(eth->eth_src))) {
+ return -EINVAL;
+ }
+ if (!ovs_scan_len(s, &n, "dl_type=0x%"SCNx16"),", &dl_type)) {
+ return -EINVAL;
+ }
+ eth->eth_type = htons(dl_type);
+
+ /* IPv4 */
+ if (!ovs_scan_len(s, &n, "ipv4(src="IP_SCAN_FMT",dst="IP_SCAN_FMT",proto=%"SCNi8
+ ",tos=%"SCNi8",ttl=%"SCNi8",frag=0x%"SCNx16"),",
+ IP_SCAN_ARGS(&sip),
+ IP_SCAN_ARGS(&dip),
+ &ip->ip_proto, &ip->ip_tos,
+ &ip->ip_ttl, &ip->ip_frag_off)) {
+ return -EINVAL;
+ }
+ put_16aligned_be32(&ip->ip_src, sip);
+ put_16aligned_be32(&ip->ip_dst, dip);
+
+ /* Tunnel header */
+ udp = (struct udp_header *) l4;
+ greh = (struct gre_base_hdr *) l4;
+ if (ovs_scan_len(s, &n, "udp(src=%"SCNi16",dst=%"SCNi16"),",
+ &udp_src, &udp_dst)) {
+ struct vxlanhdr *vxh;
+ uint32_t vx_flags, vx_vni;
+
+ udp->udp_src = htons(udp_src);
+ udp->udp_dst = htons(udp_dst);
+ udp->udp_len = 0;
+ udp->udp_csum = 0;
+
+ vxh = (struct vxlanhdr *) (udp + 1);
+ if (!ovs_scan_len(s, &n, "vxlan(flags=0x%"SCNx32",vni=0x%"SCNx32"))",
+ &vx_flags, &vx_vni)) {
+ return -EINVAL;
+ }
+ put_16aligned_be32(&vxh->vx_flags, htonl(vx_flags));
+ put_16aligned_be32(&vxh->vx_vni, htonl(vx_vni));
+ tnl_type = OVS_VPORT_TYPE_VXLAN;
+ header_len = sizeof *eth + sizeof *ip +
+ sizeof *udp + sizeof *vxh;
+ } else if (ovs_scan_len(s, &n, "gre((flags=0x%"SCNx16",proto=0x%"SCNx16")",
+ &greh->flags, &gre_proto)){
+
+ tnl_type = OVS_VPORT_TYPE_GRE;
+ greh->protocol = htons(gre_proto);
+ ovs_16aligned_be32 *options = (ovs_16aligned_be32 *) (greh + 1);
+
+ if (greh->flags & htons(GRE_CSUM)) {
+ uint32_t csum;
+
+ if (!ovs_scan_len(s, &n, ",csum=0x%"SCNx32, &csum)) {
+ return -EINVAL;
+ }
+ put_16aligned_be32(options, htonl(csum));
+ options++;
+ }
+ if (greh->flags & htons(GRE_KEY)) {
+ uint32_t key;
+
+ if (!ovs_scan_len(s, &n, ",key=0x%"SCNx32, &key)) {
+ return -EINVAL;
+ }
+
+ put_16aligned_be32(options, htonl(key));
+ options++;
+ }
+ if (greh->flags & htons(GRE_SEQ)) {
+ uint32_t seq;
+
+ if (!ovs_scan_len(s, &n, ",seq=0x%"SCNx32, &seq)) {
+ return -EINVAL;
+ }
+ put_16aligned_be32(options, htonl(seq));
+ options++;
+ }
+
+ if (!ovs_scan_len(s, &n, "))")) {
+ return -EINVAL;
+ }
+
+ header_len = sizeof *eth + sizeof *ip +
+ ((uint8_t *) options - (uint8_t *) greh);
+ } else {
+ return -EINVAL;
+ }
+
+ /* check tunnel meta data. */
+ if (data->tnl_type != tnl_type) {
+ return -EINVAL;
+ }
+ if (data->header_len != header_len) {
+ return -EINVAL;
+ }
+
+ /* Out port */
+ if (!ovs_scan_len(s, &n, ",out_port(%"SCNi32"))", &data->out_port)) {
+ return -EINVAL;
+ }
+
+ return n;
+}
+
static int
parse_odp_action(const char *s, const struct simap *port_names,
struct ofpbuf *actions)
}
}
+ {
+ uint32_t port;
+ int n;
+
+ if (ovs_scan(s, "tnl_pop(%"SCNi32")%n", &port, &n)) {
+ nl_msg_put_u32(actions, OVS_ACTION_ATTR_TUNNEL_POP, port);
+ return n;
+ }
+ }
+
+ {
+ struct ovs_action_push_tnl data;
+ int n;
+
+ n = ovs_parse_tnl_push(s, &data);
+ if (n > 0) {
+ odp_put_tnl_push_action(actions, &data);
+ return n;
+ } else if (n < 0) {
+ return n;
+ }
+ }
return -EINVAL;
}
tun_key_to_attr(odp_actions, tunnel);
nl_msg_end_nested(odp_actions, offset);
}
+
+void
+odp_put_tnl_push_action(struct ofpbuf *odp_actions,
+ struct ovs_action_push_tnl *data)
+{
+ int size = offsetof(struct ovs_action_push_tnl, header);
+
+ size += data->header_len;
+ nl_msg_put_unspec(odp_actions, OVS_ACTION_ATTR_TUNNEL_PUSH, data, size);
+}
+
\f
/* The commit_odp_actions() function and its helpers. */
void odp_put_tunnel_action(const struct flow_tnl *tunnel,
struct ofpbuf *odp_actions);
+void odp_put_tnl_push_action(struct ofpbuf *odp_actions,
+ struct ovs_action_push_tnl *data);
#endif /* odp-util.h */
}
#endif
+static inline void ofpbuf_reset_packet(struct ofpbuf *b, int off)
+{
+ ofpbuf_set_size(b, ofpbuf_size(b) - off);
+ ofpbuf_set_data(b, (void *) ((unsigned char *) b->frame + off));
+ b->frame = NULL;
+ b->l2_5_ofs = b->l3_ofs = b->l4_ofs = UINT16_MAX;
+}
+
#ifdef __cplusplus
}
#endif
#include "dynamic-string.h"
#include "netdev.h"
#include "packets.h"
+#include "seq.h"
#include "ovs-router.h"
#include "ovs-router-linux.h"
#include "unixctl.h"
/* An old rule with the same match was displaced. */
ovsrcu_postpone(rt_entry_free, ovs_router_entry_cast(cr));
}
+ seq_change(tnl_conf_seq);
}
void
if (rt_entry_delete(plen + 32, ip, plen)) {
unixctl_command_reply(conn, "OK");
+ seq_change(tnl_conf_seq);
} else {
unixctl_command_reply(conn, "Not found");
}
classifier_remove(&cls, &rt->cr);
}
}
+ seq_change(tnl_conf_seq);
}
/* May not be called more than once. */
ovs_router_unixctl_register(void)
{
classifier_init(&cls, NULL);
- /* XXX: Add documentation for these commands. */
- unixctl_command_register("ovs/route/add", "ip mask dev gw", 2, 3,
+ unixctl_command_register("ovs/route/add", "ipv4_addr/prefix_len out_br_name gw", 2, 3,
ovs_router_add, NULL);
unixctl_command_register("ovs/route/show", "", 0, 0, ovs_router_show, NULL);
- unixctl_command_register("ovs/route/del", "ip mask", 1, 1, ovs_router_del,
+ unixctl_command_register("ovs/route/del", "ipv4_addr/prefix_len", 1, 1, ovs_router_del,
NULL);
}
ds_put_cstr(s, "[800]");
}
}
+
+#define ARP_PACKET_SIZE (2 + ETH_HEADER_LEN + VLAN_HEADER_LEN + \
+ ARP_ETH_HEADER_LEN)
+
+void
+compose_arp(struct ofpbuf *b, const uint8_t eth_src[ETH_ADDR_LEN],
+ ovs_be32 ip_src, ovs_be32 ip_dst)
+{
+ struct eth_header *eth;
+ struct arp_eth_header *arp;
+
+ ofpbuf_clear(b);
+ ofpbuf_prealloc_tailroom(b, ARP_PACKET_SIZE);
+ ofpbuf_reserve(b, 2 + VLAN_HEADER_LEN);
+
+ eth = ofpbuf_put_uninit(b, sizeof *eth);
+ memcpy(eth->eth_dst, eth_addr_broadcast, ETH_ADDR_LEN);
+ memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN);
+ eth->eth_type = htons(ETH_TYPE_ARP);
+
+ arp = ofpbuf_put_uninit(b, sizeof *arp);
+ arp->ar_hrd = htons(ARP_HRD_ETHERNET);
+ arp->ar_pro = htons(ARP_PRO_IP);
+ arp->ar_hln = sizeof arp->ar_sha;
+ arp->ar_pln = sizeof arp->ar_spa;
+ arp->ar_op = htons(ARP_OP_REQUEST);
+ memcpy(arp->ar_sha, eth_src, ETH_ADDR_LEN);
+ memset(arp->ar_tha, 0, ETH_ADDR_LEN);
+
+ put_16aligned_be32(&arp->ar_spa, ip_src);
+ put_16aligned_be32(&arp->ar_tpa, ip_dst);
+
+ ofpbuf_set_frame(b, eth);
+ ofpbuf_set_l3(b, arp);
+}
#define ETH_TYPE_IP 0x0800
#define ETH_TYPE_ARP 0x0806
+#define ETH_TYPE_TEB 0x6558
#define ETH_TYPE_VLAN_8021Q 0x8100
#define ETH_TYPE_VLAN ETH_TYPE_VLAN_8021Q
#define ETH_TYPE_VLAN_8021AD 0x88a8
ovs_16aligned_be32 ip_src;
ovs_16aligned_be32 ip_dst;
};
+
BUILD_ASSERT_DECL(IP_HEADER_LEN == sizeof(struct ip_header));
#define ICMP_HEADER_LEN 8
|| dl_type == htons(ETH_TYPE_IPV6);
}
+/* Tunnel header */
#define GENEVE_CRIT_OPT_TYPE (1 << 7)
struct geneve_opt {
ovs_be16 opt_class;
uint8_t opt_data[];
};
+/* GRE protocol header */
+struct gre_base_hdr {
+ ovs_be16 flags;
+ ovs_be16 protocol;
+};
+
+#define GRE_CSUM 0x8000
+#define GRE_ROUTING 0x4000
+#define GRE_KEY 0x2000
+#define GRE_SEQ 0x1000
+#define GRE_STRICT 0x0800
+#define GRE_REC 0x0700
+#define GRE_FLAGS 0x00F8
+#define GRE_VERSION 0x0007
+
+/* VXLAN protocol header */
+struct vxlanhdr {
+ ovs_16aligned_be32 vx_flags;
+ ovs_16aligned_be32 vx_vni;
+};
+
+#define VXLAN_FLAGS 0x08000000 /* struct vxlanhdr.vx_flags required value. */
+
void format_ipv6_addr(char *addr_str, const struct in6_addr *addr);
void print_ipv6_addr(struct ds *string, const struct in6_addr *addr);
void print_ipv6_masked(struct ds *string, const struct in6_addr *addr,
void packet_format_tcp_flags(struct ds *, uint16_t);
const char *packet_tcp_flag_to_string(uint32_t flag);
+void compose_arp(struct ofpbuf *b, const uint8_t eth_src[ETH_ADDR_LEN],
+ ovs_be32 ip_src, ovs_be32 ip_dst);
#endif /* packets.h */
--- /dev/null
+/*
+ * Copyright (c) 2014 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include <inttypes.h>
+#include <stdlib.h>
+
+#include "bitmap.h"
+#include "cmap.h"
+#include "coverage.h"
+#include "dpif-netdev.h"
+#include "dynamic-string.h"
+#include "errno.h"
+#include "flow.h"
+#include "netdev.h"
+#include "ovs-thread.h"
+#include "packets.h"
+#include "packet-dpif.h"
+#include "poll-loop.h"
+#include "seq.h"
+#include "timeval.h"
+#include "tnl-arp-cache.h"
+#include "unaligned.h"
+#include "unixctl.h"
+#include "util.h"
+#include "vlog.h"
+
+
+/* In seconds */
+#define ARP_ENTRY_DEFAULT_IDLE_TIME (15 * 60)
+
+struct tnl_arp_entry {
+ struct cmap_node cmap_node;
+ ovs_be32 ip;
+ uint8_t mac[ETH_ADDR_LEN];
+ time_t expires; /* Expiration time. */
+ char br_name[IFNAMSIZ];
+};
+
+static struct cmap table;
+static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
+
+static struct tnl_arp_entry *
+tnl_arp_lookup__(const char br_name[IFNAMSIZ], ovs_be32 dst)
+{
+ struct tnl_arp_entry *arp;
+
+ CMAP_FOR_EACH_WITH_HASH (arp, cmap_node, (OVS_FORCE uint32_t) dst, &table) {
+ if (arp->ip == dst && !strcmp(arp->br_name, br_name)) {
+ arp->expires = time_now() + ARP_ENTRY_DEFAULT_IDLE_TIME;
+ return arp;
+ }
+ }
+ return NULL;
+}
+
+int
+tnl_arp_lookup(const char br_name[IFNAMSIZ], ovs_be32 dst,
+ uint8_t mac[ETH_ADDR_LEN])
+{
+ struct tnl_arp_entry *arp;
+ int res = ENOENT;
+
+ arp = tnl_arp_lookup__(br_name, dst);
+ if (arp) {
+ memcpy(mac, arp->mac, ETH_ADDR_LEN);
+ res = 0;
+ }
+
+ return res;
+}
+
+static void
+arp_entry_free(struct tnl_arp_entry *arp)
+{
+ free(arp);
+}
+
+static void
+tnl_arp_delete(struct tnl_arp_entry *arp)
+{
+ cmap_remove(&table, &arp->cmap_node, (OVS_FORCE uint32_t) arp->ip);
+ ovsrcu_postpone(arp_entry_free, arp);
+}
+
+int
+tnl_arp_snoop(const struct flow *flow, struct flow_wildcards *wc,
+ const char name[IFNAMSIZ])
+{
+ struct tnl_arp_entry *arp;
+
+ if (flow->dl_type != htons(ETH_TYPE_ARP)) {
+ return EINVAL;
+ }
+
+ /* Exact Match on all ARP flows. */
+ memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
+ memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src);
+ memset(&wc->masks.arp_sha, 0xff, sizeof wc->masks.arp_sha);
+
+ ovs_mutex_lock(&mutex);
+ arp = tnl_arp_lookup__(name, flow->nw_src);
+ if (arp) {
+ if (!memcmp(arp->mac, flow->arp_sha, ETH_ADDR_LEN)) {
+ arp->expires = time_now() + ARP_ENTRY_DEFAULT_IDLE_TIME;
+ ovs_mutex_unlock(&mutex);
+ return 0;
+ }
+ tnl_arp_delete(arp);
+ seq_change(tnl_conf_seq);
+ }
+
+ arp = xmalloc(sizeof *arp);
+
+ arp->ip = flow->nw_src;
+ memcpy(arp->mac, flow->arp_sha, ETH_ADDR_LEN);
+ arp->expires = time_now() + ARP_ENTRY_DEFAULT_IDLE_TIME;
+ strncpy(arp->br_name, name, IFNAMSIZ);
+ cmap_insert(&table, &arp->cmap_node, (OVS_FORCE uint32_t) arp->ip);
+ ovs_mutex_unlock(&mutex);
+ return 0;
+}
+
+void
+tnl_arp_cache_run(void)
+{
+ struct tnl_arp_entry *arp;
+ bool changed = false;
+
+ ovs_mutex_lock(&mutex);
+ CMAP_FOR_EACH(arp, cmap_node, &table) {
+ if (arp->expires <= time_now()) {
+ tnl_arp_delete(arp);
+ changed = true;
+ }
+ }
+ ovs_mutex_unlock(&mutex);
+
+ if (changed) {
+ seq_change(tnl_conf_seq);
+ }
+}
+
+static void
+tnl_arp_cache_flush(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED,
+ const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
+{
+ struct tnl_arp_entry *arp;
+ bool changed = false;
+
+ ovs_mutex_lock(&mutex);
+ CMAP_FOR_EACH(arp, cmap_node, &table) {
+ tnl_arp_delete(arp);
+ changed = true;
+ }
+ ovs_mutex_unlock(&mutex);
+ if (changed) {
+ seq_change(tnl_conf_seq);
+ }
+ unixctl_command_reply(conn, "OK");
+}
+
+#define MAX_IP_ADDR_LEN 17
+
+static void
+tnl_arp_cache_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
+ const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
+{
+ struct ds ds = DS_EMPTY_INITIALIZER;
+ struct tnl_arp_entry *arp;
+
+ ds_put_cstr(&ds, "IP MAC Bridge\n");
+ ds_put_cstr(&ds, "=============================================\n");
+ ovs_mutex_lock(&mutex);
+ CMAP_FOR_EACH(arp, cmap_node, &table) {
+ int start_len, need_ws;
+
+ start_len = ds.length;
+ ds_put_format(&ds, IP_FMT, IP_ARGS(arp->ip));
+
+ need_ws = MAX_IP_ADDR_LEN - (ds.length - start_len);
+ ds_put_char_multiple(&ds, ' ', need_ws);
+
+ ds_put_format(&ds, ETH_ADDR_FMT" %s\n",
+ ETH_ADDR_ARGS(arp->mac), arp->br_name);
+
+ }
+ ovs_mutex_unlock(&mutex);
+ unixctl_command_reply(conn, ds_cstr(&ds));
+ ds_destroy(&ds);
+}
+
+void
+tnl_arp_cache_init(void)
+{
+ cmap_init(&table);
+
+ unixctl_command_register("tnl/arp/show", "", 0, 0, tnl_arp_cache_show, NULL);
+ unixctl_command_register("tnl/arp/flush", "", 0, 0, tnl_arp_cache_flush, NULL);
+}
--- /dev/null
+/*
+ * Copyright (c) 2014 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TNL_ARP_CACHE_H
+#define TNL_ARP_CACHE_H 1
+
+#include <errno.h>
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <net/if.h>
+#include <sys/socket.h>
+
+#include "flow.h"
+#include "netdev.h"
+#include "packets.h"
+#include "util.h"
+
+int tnl_arp_snoop(const struct flow *flow, struct flow_wildcards *wc,
+ const char dev_name[]);
+int tnl_arp_lookup(const char dev_name[], ovs_be32 dst, uint8_t mac[ETH_ADDR_LEN]);
+void tnl_arp_cache_init(void);
+void tnl_arp_cache_run(void);
+
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2014 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "classifier.h"
+#include "dynamic-string.h"
+#include "hash.h"
+#include "ofpbuf.h"
+#include "ovs-thread.h"
+#include "odp-util.h"
+#include "tnl-arp-cache.h"
+#include "tnl-ports.h"
+#include "unixctl.h"
+#include "util.h"
+
+static struct classifier cls; /* Tunnel ports. */
+
+struct tnl_port_in {
+ struct cls_rule cr;
+ odp_port_t portno;
+ struct ovs_refcount ref_cnt;
+ char dev_name[IFNAMSIZ];
+};
+
+static struct tnl_port_in *
+tnl_port_cast(const struct cls_rule *cr)
+{
+ BUILD_ASSERT_DECL(offsetof(struct tnl_port_in, cr) == 0);
+
+ return CONTAINER_OF(cr, struct tnl_port_in, cr);
+}
+
+static void
+tnl_port_free(struct tnl_port_in *p)
+{
+ cls_rule_destroy(&p->cr);
+ free(p);
+}
+
+static void
+tnl_port_init_flow(struct flow *flow, ovs_be32 ip_dst, ovs_be16 udp_port)
+{
+ memset(flow, 0, sizeof *flow);
+ flow->dl_type = htons(ETH_TYPE_IP);
+ if (udp_port) {
+ flow->nw_proto = IPPROTO_UDP;
+ } else {
+ flow->nw_proto = IPPROTO_GRE;
+ }
+ flow->tp_dst = udp_port;
+ /* When matching on incoming flow from remove tnl end point,
+ * our dst ip address is source ip for them. */
+ flow->nw_src = ip_dst;
+}
+
+void
+tnl_port_map_insert(odp_port_t port, ovs_be32 ip_dst, ovs_be16 udp_port,
+ const char dev_name[])
+{
+ const struct cls_rule *cr;
+ struct tnl_port_in *p;
+ struct match match;
+
+ memset(&match, 0, sizeof match);
+ tnl_port_init_flow(&match.flow, ip_dst, udp_port);
+
+ do {
+ cr = classifier_lookup(&cls, &match.flow, NULL);
+ p = tnl_port_cast(cr);
+ /* Try again if the rule was released before we get the reference. */
+ } while (p && !ovs_refcount_try_ref_rcu(&p->ref_cnt));
+
+ if (p) {
+ return; /* Added refcount of an existing port. */
+ }
+
+ p = xzalloc(sizeof *p);
+ p->portno = port;
+
+ match.wc.masks.dl_type = OVS_BE16_MAX;
+ match.wc.masks.nw_proto = 0xff;
+ match.wc.masks.nw_frag = 0xff; /* XXX: No fragments support. */
+ match.wc.masks.tp_dst = OVS_BE16_MAX;
+ match.wc.masks.nw_src = OVS_BE32_MAX;
+
+ cls_rule_init(&p->cr, &match, 0); /* Priority == 0. */
+ ovs_refcount_init(&p->ref_cnt);
+ strncpy(p->dev_name, dev_name, IFNAMSIZ);
+
+ classifier_insert(&cls, &p->cr);
+}
+
+static void
+tnl_port_unref(const struct cls_rule *cr)
+{
+ struct tnl_port_in *p = tnl_port_cast(cr);
+
+ if (cr && ovs_refcount_unref_relaxed(&p->ref_cnt) == 1) {
+ if (classifier_remove(&cls, cr)) {
+ ovsrcu_postpone(tnl_port_free, p);
+ }
+ }
+}
+
+void
+tnl_port_map_delete(ovs_be32 ip_dst, ovs_be16 udp_port)
+{
+ const struct cls_rule *cr;
+ struct flow flow;
+
+ tnl_port_init_flow(&flow, ip_dst, udp_port);
+
+ cr = classifier_lookup(&cls, &flow, NULL);
+ tnl_port_unref(cr);
+}
+
+odp_port_t
+tnl_port_map_lookup(const struct flow *flow, struct flow_wildcards *wc)
+{
+ const struct cls_rule *cr = classifier_lookup(&cls, flow, wc);
+
+ return (cr) ? tnl_port_cast(cr)->portno : ODPP_NONE;
+}
+
+static void
+tnl_port_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
+ const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
+{
+ struct ds ds = DS_EMPTY_INITIALIZER;
+ const struct tnl_port_in *p;
+
+ ds_put_format(&ds, "Listening ports:\n");
+ CLS_FOR_EACH(p, cr, &cls) {
+ struct odputil_keybuf keybuf;
+ struct odputil_keybuf maskbuf;
+ struct flow flow;
+ const struct nlattr *key, *mask;
+ size_t key_len, mask_len;
+ struct flow_wildcards wc;
+ struct ofpbuf buf;
+
+ ds_put_format(&ds, "%s (%"PRIu32") : ", p->dev_name, p->portno);
+ minimask_expand(&p->cr.match.mask, &wc);
+ miniflow_expand(&p->cr.match.flow, &flow);
+
+ /* Key. */
+ ofpbuf_use_stack(&buf, &keybuf, sizeof keybuf);
+ odp_flow_key_from_flow(&buf, &flow, &wc.masks,
+ flow.in_port.odp_port, true);
+ key = ofpbuf_data(&buf);
+ key_len = ofpbuf_size(&buf);
+ /* mask*/
+ ofpbuf_use_stack(&buf, &maskbuf, sizeof maskbuf);
+ odp_flow_key_from_mask(&buf, &wc.masks, &flow,
+ odp_to_u32(wc.masks.in_port.odp_port),
+ SIZE_MAX, false);
+ mask = ofpbuf_data(&buf);
+ mask_len = ofpbuf_size(&buf);
+
+ /* build string. */
+ odp_flow_format(key, key_len, mask, mask_len, NULL, &ds, false);
+ ds_put_format(&ds, "\n");
+ }
+ unixctl_command_reply(conn, ds_cstr(&ds));
+ ds_destroy(&ds);
+}
+
+void
+tnl_port_map_init(void)
+{
+ classifier_init(&cls, flow_segment_u32s);
+ unixctl_command_register("tnl/ports/show", "", 0, 0, tnl_port_show, NULL);
+}
--- /dev/null
+/*
+ * Copyright (c) 2014 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TNL_PORT_H
+#define TNL_PORT_H 1
+
+#include <net/if.h>
+#include <sys/socket.h>
+
+#include "flow.h"
+#include "packets.h"
+#include "util.h"
+
+odp_port_t tnl_port_map_lookup(const struct flow *flow,
+ struct flow_wildcards *wc);
+
+void tnl_port_map_insert(odp_port_t port, ovs_be32 ip_dst, ovs_be16 udp_port,
+ const char dev_name[]);
+
+void tnl_port_map_delete(ovs_be32 ip_dst, ovs_be16 udp_port);
+
+void tnl_port_map_init(void);
+
+#endif
lib/vlog-unixctl.man \
lib/vlog.man \
ofproto/ofproto-dpif-unixctl.man \
+ ofproto/ofproto-tnl-unixctl.man \
ofproto/ofproto-unixctl.man \
ovsdb/remote-active.man \
ovsdb/remote-passive.man
lib/vlog-unixctl.man:
lib/vlog.man:
ofproto/ofproto-dpif-unixctl.man:
+ofproto/ofproto-tnl-unixctl.man:
ofproto/ofproto-unixctl.man:
ovsdb/remote-active.man:
ovsdb/remote-passive.man:
CLEANFILES += ofproto/ipfix-entities.def
-MAN_FRAGMENTS += ofproto/ofproto-unixctl.man ofproto/ofproto-dpif-unixctl.man
+MAN_FRAGMENTS += ofproto/ofproto-unixctl.man ofproto/ofproto-dpif-unixctl.man \
+ ofproto/ofproto-tnl-unixctl.man
# IPFIX entity definition macros generation from IANA's XML definition.
EXTRA_DIST += ofproto/ipfix.xml
#include "ofproto/ofproto-dpif-xlate.h"
#include <errno.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include "tnl-arp-cache.h"
#include "bfd.h"
#include "bitmap.h"
#include "bond.h"
#include "ofproto/ofproto-dpif.h"
#include "ofproto/ofproto-provider.h"
#include "packet-dpif.h"
+#include "ovs-router.h"
+#include "tnl-ports.h"
#include "tunnel.h"
#include "vlog.h"
XC_NORMAL,
XC_FIN_TIMEOUT,
XC_GROUP,
+ XC_TNL_ARP,
};
/* xlate_cache entries hold enough information to perform the side effects of
struct group_dpif *group;
struct ofputil_bucket *bucket;
} group;
+ struct {
+ char br_name[IFNAMSIZ];
+ ovs_be32 d_ip;
+ } tnl_arp_cache;
} u;
};
}
}
+static int
+tnl_route_lookup_flow(const struct flow *oflow,
+ ovs_be32 *ip, struct xport **out_port)
+{
+ char out_dev[IFNAMSIZ];
+ struct xbridge *xbridge;
+ struct xlate_cfg *xcfg;
+ ovs_be32 gw;
+
+ if (!ovs_router_lookup(oflow->tunnel.ip_dst, out_dev, &gw)) {
+ return -ENOENT;
+ }
+
+ if (gw) {
+ *ip = gw;
+ } else {
+ *ip = oflow->tunnel.ip_dst;
+ }
+
+ xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
+ ovs_assert(xcfg);
+
+ HMAP_FOR_EACH (xbridge, hmap_node, &xcfg->xbridges) {
+ if (!strncmp(xbridge->name, out_dev, IFNAMSIZ)) {
+ struct xport *port;
+
+ HMAP_FOR_EACH (port, ofp_node, &xbridge->xports) {
+ if (!strncmp(netdev_get_name(port->netdev), out_dev, IFNAMSIZ)) {
+ *out_port = port;
+ return 0;
+ }
+ }
+ }
+ }
+ return -ENOENT;
+}
+
+static int
+xlate_flood_packet(struct xbridge *xbridge, struct ofpbuf *packet)
+{
+ struct ofpact_output output;
+ struct flow flow;
+
+ ofpact_init(&output.ofpact, OFPACT_OUTPUT, sizeof output);
+ /* Use OFPP_NONE as the in_port to avoid special packet processing. */
+ flow_extract(packet, NULL, &flow);
+ flow.in_port.ofp_port = OFPP_NONE;
+ output.port = OFPP_FLOOD;
+ output.max_len = 0;
+
+ return ofproto_dpif_execute_actions(xbridge->ofproto, &flow, NULL,
+ &output.ofpact, sizeof output,
+ packet);
+}
+
+static void
+tnl_send_arp_request(const struct xport *out_dev, const uint8_t eth_src[ETH_ADDR_LEN],
+ ovs_be32 ip_src, ovs_be32 ip_dst)
+{
+ struct xbridge *xbridge = out_dev->xbridge;
+ struct ofpbuf packet;
+
+ ofpbuf_init(&packet, 0);
+ compose_arp(&packet, eth_src, ip_src, ip_dst);
+
+ xlate_flood_packet(xbridge, &packet);
+ ofpbuf_uninit(&packet);
+}
+
+static int
+build_tunnel_send(const struct xlate_ctx *ctx, const struct xport *xport,
+ const struct flow *flow, odp_port_t tunnel_odp_port)
+{
+ struct ovs_action_push_tnl tnl_push_data;
+ struct xport *out_dev = NULL;
+ ovs_be32 s_ip, d_ip = 0;
+ uint8_t smac[ETH_ADDR_LEN];
+ uint8_t dmac[ETH_ADDR_LEN];
+ int err;
+
+ err = tnl_route_lookup_flow(flow, &d_ip, &out_dev);
+ if (err) {
+ return err;
+ }
+
+ /* Use mac addr of bridge port of the peer. */
+ err = netdev_get_etheraddr(out_dev->netdev, smac);
+ if (err) {
+ return err;
+ }
+
+ err = netdev_get_in4(out_dev->netdev, (struct in_addr *) &s_ip, NULL);
+ if (err) {
+ return err;
+ }
+
+ err = tnl_arp_lookup(out_dev->xbridge->name, d_ip, dmac);
+ if (err) {
+ tnl_send_arp_request(out_dev, smac, s_ip, d_ip);
+ return err;
+ }
+ if (ctx->xin->xcache) {
+ struct xc_entry *entry;
+
+ entry = xlate_cache_add_entry(ctx->xin->xcache, XC_TNL_ARP);
+ strncpy(entry->u.tnl_arp_cache.br_name, out_dev->xbridge->name, IFNAMSIZ);
+ entry->u.tnl_arp_cache.d_ip = d_ip;
+ }
+ err = tnl_port_build_header(xport->ofport, flow,
+ dmac, smac, s_ip, &tnl_push_data);
+ if (err) {
+ return err;
+ }
+ tnl_push_data.tnl_port = odp_to_u32(tunnel_odp_port);
+ tnl_push_data.out_port = odp_to_u32(out_dev->odp_port);
+ odp_put_tnl_push_action(ctx->xout->odp_actions, &tnl_push_data);
+ return 0;
+}
+
static void
compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
bool check_stp)
const struct xport *xport = get_ofp_port(ctx->xbridge, ofp_port);
struct flow_wildcards *wc = &ctx->xout->wc;
struct flow *flow = &ctx->xin->flow;
+ struct flow_tnl flow_tnl;
ovs_be16 flow_vlan_tci;
uint32_t flow_pkt_mark;
uint8_t flow_nw_tos;
odp_port_t out_port, odp_port;
+ bool tnl_push_pop_send = false;
uint8_t dscp;
/* If 'struct flow' gets additional metadata, we'll need to zero it out
* before traversing a patch port. */
BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28);
+ memset(&flow_tnl, 0, sizeof flow_tnl);
if (!xport) {
xlate_report(ctx, "Nonexistent output port");
* the Logical (tunnel) Port are not visible for any further
* matches, while explicit set actions on tunnel metadata are.
*/
- struct flow_tnl flow_tnl = flow->tunnel;
+ flow_tnl = flow->tunnel;
odp_port = tnl_port_send(xport->ofport, flow, &ctx->xout->wc);
if (odp_port == ODPP_NONE) {
xlate_report(ctx, "Tunneling decided against output");
entry->u.dev.tx = netdev_ref(xport->netdev);
}
out_port = odp_port;
- commit_odp_tunnel_action(flow, &ctx->base_flow,
- ctx->xout->odp_actions);
- flow->tunnel = flow_tnl; /* Restore tunnel metadata */
+ if (ovs_native_tunneling_is_on(ctx->xbridge->ofproto)) {
+ tnl_push_pop_send = true;
+ } else {
+ commit_odp_tunnel_action(flow, &ctx->base_flow,
+ ctx->xout->odp_actions);
+ flow->tunnel = flow_tnl; /* Restore tunnel metadata */
+ }
} else {
odp_port = xport->odp_port;
out_port = odp_port;
if (out_port != ODPP_NONE) {
ctx->xout->slow |= commit_odp_actions(flow, &ctx->base_flow,
ctx->xout->odp_actions,
- &ctx->xout->wc,
+ wc,
ctx->xbridge->masked_set_action);
if (ctx->use_recirc) {
nl_msg_put_u32(ctx->xout->odp_actions, OVS_ACTION_ATTR_RECIRC,
xr->recirc_id);
} else {
- add_ipfix_output_action(ctx, out_port);
- nl_msg_put_odp_port(ctx->xout->odp_actions, OVS_ACTION_ATTR_OUTPUT,
- out_port);
+
+ if (tnl_push_pop_send) {
+ build_tunnel_send(ctx, xport, flow, odp_port);
+ flow->tunnel = flow_tnl; /* Restore tunnel metadata */
+ } else {
+ odp_port_t odp_tnl_port = ODPP_NONE;
+
+ /* XXX: Write better Filter for tunnel port. We can use inport
+ * int tunnel-port flow to avoid these checks completely. */
+ if (ofp_port == OFPP_LOCAL &&
+ ovs_native_tunneling_is_on(ctx->xbridge->ofproto)) {
+
+ odp_tnl_port = tnl_port_map_lookup(flow, wc);
+ }
+
+ if (odp_tnl_port != ODPP_NONE) {
+ nl_msg_put_odp_port(ctx->xout->odp_actions,
+ OVS_ACTION_ATTR_TUNNEL_POP,
+ odp_tnl_port);
+ } else {
+ /* Tunnel push-pop action is not compatible with
+ * IPFIX action. */
+ add_ipfix_output_action(ctx, out_port);
+ nl_msg_put_odp_port(ctx->xout->odp_actions,
+ OVS_ACTION_ATTR_OUTPUT,
+ out_port);
+ }
+ }
}
ctx->sflow_odp_port = odp_port;
struct flow *flow = &ctx->xin->flow;
const struct ofpact *a;
+ if (ovs_native_tunneling_is_on(ctx->xbridge->ofproto)) {
+ tnl_arp_snoop(flow, wc, ctx->xbridge->name);
+ }
/* dl_type already in the mask, not set below. */
OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) {
{
struct xc_entry *entry;
struct ofpbuf entries = xcache->entries;
+ uint8_t dmac[ETH_ADDR_LEN];
if (!stats->n_packets) {
return;
group_dpif_credit_stats(entry->u.group.group, entry->u.group.bucket,
stats);
break;
+ case XC_TNL_ARP:
+ /* Lookup arp to avoid arp timeout. */
+ tnl_arp_lookup(entry->u.tnl_arp_cache.br_name, entry->u.tnl_arp_cache.d_ip, dmac);
+ break;
default:
OVS_NOT_REACHED();
}
case XC_GROUP:
group_dpif_unref(entry->u.group.group);
break;
+ case XC_TNL_ARP:
+ break;
default:
OVS_NOT_REACHED();
}
#include "ofproto-dpif-sflow.h"
#include "ofproto-dpif-upcall.h"
#include "ofproto-dpif-xlate.h"
-#include "ovs-router.h"
#include "poll-loop.h"
+#include "ovs-router.h"
#include "seq.h"
#include "simap.h"
#include "smap.h"
/* Version string of the datapath stored in OVSDB. */
char *dp_version_string;
+
+ /* True if the datapath supports tnl_push and pop actions. */
+ bool enable_tnl_push_pop;
+ struct atomic_count tnl_count;
};
/* All existing ofproto_backer instances, indexed by ofproto->up.type. */
/* All existing ofproto_dpif instances, indexed by ->up.name. */
static struct hmap all_ofproto_dpifs = HMAP_INITIALIZER(&all_ofproto_dpifs);
-static void ofproto_dpif_unixctl_init(void);
+static bool ofproto_use_tnl_push_pop = true;
+static void ofproto_unixctl_init(void);
static inline struct ofproto_dpif *
ofproto_dpif_cast(const struct ofproto *ofproto)
return 0;
}
- dpif_run(backer->dpif);
+
+ if (dpif_run(backer->dpif)) {
+ backer->need_revalidate = REV_RECONFIGURE;
+ }
+
udpif_run(backer->udpif);
/* If vswitchd started with other_config:flow_restore_wait set as "true",
iter->odp_port = node ? u32_to_odp(node->data) : ODPP_NONE;
if (tnl_port_reconfigure(iter, iter->up.netdev,
- iter->odp_port)) {
+ iter->odp_port,
+ ovs_native_tunneling_is_on(ofproto), dp_port)) {
backer->need_revalidate = REV_RECONFIGURE;
}
}
backer->masked_set_action = check_masked_set_action(backer);
backer->rid_pool = recirc_id_pool_create();
+ backer->enable_tnl_push_pop = dpif_supports_tnl_push_pop(backer->dpif);
+ atomic_count_init(&backer->tnl_count, 0);
+
error = dpif_recv_set(backer->dpif, backer->recv_set_enable);
if (error) {
VLOG_ERR("failed to listen on datapath of type %s: %s",
return error;
}
+bool
+ovs_native_tunneling_is_on(struct ofproto_dpif *ofproto)
+{
+ return ofproto_use_tnl_push_pop && ofproto->backer->enable_tnl_push_pop &&
+ atomic_count_get(&ofproto->backer->tnl_count);
+}
+
/* Tests whether 'backer''s datapath supports recirculation. Only newer
* datapaths support OVS_KEY_ATTR_RECIRC_ID in keys. We need to disable some
* features on older datapaths that don't support this feature.
guarded_list_init(&ofproto->pins);
- ofproto_dpif_unixctl_init();
+ ofproto_unixctl_init();
ovs_router_unixctl_register();
hmap_init(&ofproto->vlandev_map);
}
}
}
-
return 0;
}
port->odp_port = dpif_port.port_no;
if (netdev_get_tunnel_config(netdev)) {
- tnl_port_add(port, port->up.netdev, port->odp_port);
+ atomic_count_inc(&ofproto->backer->tnl_count);
+ tnl_port_add(port, port->up.netdev, port->odp_port,
+ ovs_native_tunneling_is_on(ofproto), namebuf);
port->is_tunnel = true;
if (ofproto->ipfix) {
dpif_ipfix_add_tunnel_port(ofproto->ipfix, port_, port->odp_port);
ovs_rwlock_unlock(&ofproto->backer->odp_to_ofport_lock);
}
+ if (port->is_tunnel) {
+ atomic_count_dec(&ofproto->backer->tnl_count);
+ }
+
if (port->is_tunnel && ofproto->ipfix) {
dpif_ipfix_del_tunnel_port(ofproto->ipfix, port->odp_port);
}
port_modified(struct ofport *port_)
{
struct ofport_dpif *port = ofport_dpif_cast(port_);
+ char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
+ struct netdev *netdev = port->up.netdev;
if (port->bundle && port->bundle->bond) {
- bond_slave_set_netdev(port->bundle->bond, port, port->up.netdev);
+ bond_slave_set_netdev(port->bundle->bond, port, netdev);
}
if (port->cfm) {
- cfm_set_netdev(port->cfm, port->up.netdev);
+ cfm_set_netdev(port->cfm, netdev);
}
if (port->bfd) {
- bfd_set_netdev(port->bfd, port->up.netdev);
+ bfd_set_netdev(port->bfd, netdev);
}
ofproto_dpif_monitor_port_update(port, port->bfd, port->cfm,
port->up.pp.hw_addr);
- if (port->is_tunnel && tnl_port_reconfigure(port, port->up.netdev,
- port->odp_port)) {
- ofproto_dpif_cast(port->up.ofproto)->backer->need_revalidate =
- REV_RECONFIGURE;
+ netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
+
+ if (port->is_tunnel) {
+ struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
+
+ if (tnl_port_reconfigure(port, netdev, port->odp_port,
+ ovs_native_tunneling_is_on(ofproto), namebuf)) {
+ ofproto->backer->need_revalidate = REV_RECONFIGURE;
+ }
}
ofport_update_peer(port);
execute.actions = ofpbuf_data(xout.odp_actions);
execute.actions_len = ofpbuf_size(xout.odp_actions);
+
execute.packet = packet;
execute.md = pkt_metadata_from_flow(flow);
execute.needs_help = (xout.slow & SLOW_ACTION) != 0;
}
static void
-ofproto_dpif_unixctl_init(void)
+ofproto_revalidate_all_backers(void)
+{
+ const struct shash_node **backers;
+ int i;
+
+ backers = shash_sort(&all_dpif_backers);
+ for (i = 0; i < shash_count(&all_dpif_backers); i++) {
+ struct dpif_backer *backer = backers[i]->data;
+ backer->need_revalidate = REV_RECONFIGURE;
+ }
+ free(backers);
+}
+
+static void
+disable_tnl_push_pop(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED,
+ const char *argv[], void *aux OVS_UNUSED)
+{
+ if (!strcasecmp(argv[1], "off")) {
+ ofproto_use_tnl_push_pop = false;
+ unixctl_command_reply(conn, "Tunnel push-pop off");
+ ofproto_revalidate_all_backers();
+ } else if (!strcasecmp(argv[1], "on")) {
+ ofproto_use_tnl_push_pop = true;
+ unixctl_command_reply(conn, "Tunnel push-pop on");
+ ofproto_revalidate_all_backers();
+ }
+}
+
+static void
+ofproto_unixctl_init(void)
{
static bool registered;
if (registered) {
NULL);
unixctl_command_register("dpif/dump-flows", "[-m] bridge", 1, 2,
ofproto_unixctl_dpif_dump_flows, NULL);
+
+ unixctl_command_register("ofproto/tnl-push-pop", "[on]|[off]", 1, 1,
+ disable_tnl_push_pop, NULL);
}
/* Returns true if 'table' is the table used for internal rules,
#undef RULE_CAST
+bool ovs_native_tunneling_is_on(struct ofproto_dpif *ofproto);
#endif /* ofproto-dpif.h */
--- /dev/null
+.SS "OPENVSWITCH TUNNELING COMMANDS"
+These commands query and modify OVS tunnel components. Ref to
+README-native-tunneling.md for more info.
+.
+.IP "\fBovs/route/add ipv4_address/plen output_bridge [GW]\fR"
+Adds ipv4_address/plen route to vswitchd routing table. output_bridge
+needs to be OVS bridge name. This command is useful if OVS cached
+routes does not look right.
+.
+.IP "\fBovs/route/show\fR"
+Print all routes in OVS routing table, This includes routes cached
+from system routing table and user configured routes.
+.
+.IP "\fBovs/route/del ipv4_address/plen\fR"
+Delete ipv4_address/plen route from OVS routing table.
+.
+.IP "\fBtnl/arp/show\fR"
+OVS builds ARP cache by snooping are messages. This command shows
+ARP cache table.
+.
+.IP "\fBtnl/arp/flush\fR"
+Flush ARP table.
+.
+.IP "\fBtnl/egress_port_range [num1] [num2]\fR"
+Set range for UDP source port used for UDP based Tunnels. For
+example VxLAN. If case of zero arguments this command prints
+current range in use.
#include "byte-order.h"
#include "connectivity.h"
+#include "csum.h"
+#include "dpif.h"
#include "dynamic-string.h"
#include "fat-rwlock.h"
#include "hash.h"
#include "hmap.h"
#include "netdev.h"
#include "odp-util.h"
+#include "ofpbuf.h"
#include "packets.h"
+#include "route-table.h"
#include "seq.h"
#include "smap.h"
#include "socket-util.h"
+#include "tnl-arp-cache.h"
+#include "tnl-ports.h"
#include "tunnel.h"
#include "vlog.h"
+#include "unaligned.h"
+#include "ofproto-dpif.h"
VLOG_DEFINE_THIS_MODULE(tunnel);
static bool
tnl_port_add__(const struct ofport_dpif *ofport, const struct netdev *netdev,
- odp_port_t odp_port, bool warn)
+ odp_port_t odp_port, bool warn, bool native_tnl, const char name[])
OVS_REQ_WRLOCK(rwlock)
{
const struct netdev_tunnel_config *cfg;
}
hmap_insert(*map, &tnl_port->match_node, tnl_hash(&tnl_port->match));
tnl_port_mod_log(tnl_port, "adding");
+
+ if (native_tnl) {
+ tnl_port_map_insert(odp_port, tnl_port->match.ip_dst,
+ cfg->dst_port, name);
+ }
return true;
}
* tunnel. */
void
tnl_port_add(const struct ofport_dpif *ofport, const struct netdev *netdev,
- odp_port_t odp_port) OVS_EXCLUDED(rwlock)
+ odp_port_t odp_port, bool native_tnl, const char name[]) OVS_EXCLUDED(rwlock)
{
fat_rwlock_wrlock(&rwlock);
- tnl_port_add__(ofport, netdev, odp_port, true);
+ tnl_port_add__(ofport, netdev, odp_port, true, native_tnl, name);
fat_rwlock_unlock(&rwlock);
}
* tnl_port_add(). */
bool
tnl_port_reconfigure(const struct ofport_dpif *ofport,
- const struct netdev *netdev, odp_port_t odp_port)
+ const struct netdev *netdev, odp_port_t odp_port,
+ bool native_tnl, const char name[])
OVS_EXCLUDED(rwlock)
{
struct tnl_port *tnl_port;
fat_rwlock_wrlock(&rwlock);
tnl_port = tnl_find_ofport(ofport);
if (!tnl_port) {
- changed = tnl_port_add__(ofport, netdev, odp_port, false);
+ changed = tnl_port_add__(ofport, netdev, odp_port, false, native_tnl, name);
} else if (tnl_port->netdev != netdev
|| tnl_port->match.odp_port != odp_port
|| tnl_port->change_seq != seq_read(connectivity_seq_get())) {
VLOG_DBG("reconfiguring %s", tnl_port_get_name(tnl_port));
tnl_port_del__(ofport);
- tnl_port_add__(ofport, netdev, odp_port, true);
+ tnl_port_add__(ofport, netdev, odp_port, true, native_tnl, name);
changed = true;
}
fat_rwlock_unlock(&rwlock);
tnl_port = tnl_find_ofport(ofport);
if (tnl_port) {
+ const struct netdev_tunnel_config *cfg =
+ netdev_get_tunnel_config(tnl_port->netdev);
struct hmap **map;
+ tnl_port_map_delete(tnl_port->match.ip_dst, cfg->dst_port);
tnl_port_mod_log(tnl_port, "removing");
map = tnl_match_map(&tnl_port->match);
hmap_remove(*map, &tnl_port->match_node);
{
return netdev_get_name(tnl_port->netdev);
}
+
+int
+tnl_port_build_header(const struct ofport_dpif *ofport,
+ const struct flow *tnl_flow,
+ uint8_t dmac[ETH_ADDR_LEN],
+ uint8_t smac[ETH_ADDR_LEN],
+ ovs_be32 ip_src, struct ovs_action_push_tnl *data)
+{
+ struct tnl_port *tnl_port;
+ struct eth_header *eth;
+ struct ip_header *ip;
+ void *l3;
+ int res;
+
+ fat_rwlock_rdlock(&rwlock);
+ tnl_port = tnl_find_ofport(ofport);
+ ovs_assert(tnl_port);
+
+ /* Build Ethernet and IP headers. */
+ memset(data->header, 0, sizeof data->header);
+
+ eth = (struct eth_header *)data->header;
+ memcpy(eth->eth_dst, dmac, ETH_ADDR_LEN);
+ memcpy(eth->eth_src, smac, ETH_ADDR_LEN);
+ eth->eth_type = htons(ETH_TYPE_IP);
+
+ l3 = (eth + 1);
+ ip = (struct ip_header *) l3;
+
+ ip->ip_ihl_ver = IP_IHL_VER(5, 4);
+ ip->ip_tos = tnl_flow->tunnel.ip_tos;
+ ip->ip_ttl = tnl_flow->tunnel.ip_ttl;
+ ip->ip_frag_off = (tnl_flow->tunnel.flags & FLOW_TNL_F_DONT_FRAGMENT) ?
+ htons(IP_DF) : 0;
+
+ put_16aligned_be32(&ip->ip_src, ip_src);
+ put_16aligned_be32(&ip->ip_dst, tnl_flow->tunnel.ip_dst);
+
+ res = netdev_build_header(tnl_port->netdev, data);
+ ip->ip_csum = csum(ip, sizeof *ip);
+ fat_rwlock_unlock(&rwlock);
+
+ return res;
+}
* These functions emulate tunnel virtual ports based on the outer
* header information from the kernel. */
+struct ovs_action_push_tnl;
struct ofport_dpif;
struct netdev;
void ofproto_tunnel_init(void);
bool tnl_port_reconfigure(const struct ofport_dpif *, const struct netdev *,
- odp_port_t);
+ odp_port_t, bool native_tnl, const char name[]);
void tnl_port_add(const struct ofport_dpif *, const struct netdev *,
- odp_port_t odp_port);
+ odp_port_t odp_port, bool native_tnl, const char name[]);
void tnl_port_del(const struct ofport_dpif *);
const struct ofport_dpif *tnl_port_receive(const struct flow *);
return flow->tunnel.ip_dst != 0;
}
+int tnl_port_build_header(const struct ofport_dpif *ofport,
+ const struct flow *tnl_flow,
+ uint8_t dmac[ETH_ADDR_LEN],
+ uint8_t smac[ETH_ADDR_LEN],
+ ovs_be32 ip_src, struct ovs_action_push_tnl *data);
+
#endif /* tunnel.h */
/usr/share/openvswitch/vswitch.ovsschema
/usr/share/openvswitch/vtep.ovsschema
%doc COPYING DESIGN.md INSTALL.SSL.md NOTICE README.md WHY-OVS.md FAQ.md NEWS
-%doc INSTALL.DPDK.md rhel/README.RHEL
+%doc INSTALL.DPDK.md rhel/README.RHEL README-native-tunneling.md
/var/lib/openvswitch
/var/log/openvswitch
tests/jsonrpc.at \
tests/jsonrpc-py.at \
tests/tunnel.at \
+ tests/tunnel-push-pop.at \
tests/lockfile.at \
tests/reconnect.at \
tests/ovs-vswitchd.at \
sample(sample=9.7%,actions(1,2,3,push_vlan(vid=1,pcp=2)))
set(tunnel(tun_id=0xabcdef1234567890,src=1.1.1.1,dst=2.2.2.2,tos=0,ttl=64,tp_src=0,tp_dst=0,flags(df,csum,key)))
set(tunnel(tun_id=0xabcdef1234567890,src=1.1.1.1,dst=2.2.2.2,tos=0,ttl=64,tp_src=0,tp_dst=0,flags(key)))
+tnl_pop(4)
+tnl_push(tnl_port(4),header(size=42,type=3,eth(dst=f8:bc:12:44:34:b6,src=f8:bc:12:46:58:e0,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=47,tos=0,ttl=64,frag=0x40),gre((flags=0x20,proto=0x6558),key=0x1e241)),out_port(1))
+tnl_push(tnl_port(4),header(size=46,type=3,eth(dst=f8:bc:12:44:34:b6,src=f8:bc:12:46:58:e0,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=47,tos=0,ttl=64,frag=0x40),gre((flags=0xa0,proto=0x6558),csum=0x0,key=0x1e241)),out_port(1))
+tnl_push(tnl_port(6),header(size=50,type=4,eth(dst=f8:bc:12:44:34:b6,src=f8:bc:12:46:58:e0,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=17,tos=0,ttl=64,frag=0x40),udp(src=0,dst=4789),vxlan(flags=0x8000000,vni=0x1c700)),out_port(1))
])
AT_CHECK_UNQUOTED([ovstest test-odp parse-actions < actions.txt], [0],
[`cat actions.txt`
AT_CHECK([ovs-appctl -t ovs-vswitchd exit])
AT_CHECK([ovs-appctl -t ovsdb-server exit])])
+m4_define([OVS_VSWITCHD_DISABLE_TUNNEL_PUSH_POP],
+ [AT_CHECK([ovs-appctl ofproto/tnl-push-pop off], [0], [dnl
+Tunnel push-pop off
+])])
+
+ [AT_CHECK([ovs-appctl ofproto/tnl-push-pop off])])
+
# ADD_OF_PORTS(BRIDGE, OF_PORT[, OF_PORT...])
#
# Creates a dummy interface with an OpenFlow port number of OF_PORT and
m4_include([tests/jsonrpc.at])
m4_include([tests/jsonrpc-py.at])
m4_include([tests/tunnel.at])
+m4_include([tests/tunnel-push-pop.at])
m4_include([tests/lockfile.at])
m4_include([tests/reconnect.at])
m4_include([tests/ovs-vswitchd.at])
--- /dev/null
+AT_BANNER([tunnel_push_pop])
+
+AT_SETUP([tunnel_push_pop - action])
+
+OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 type=dummy ofport_request=1\
+ -- add-br int-br -- set bridge int-br datapath_type=dummy \
+ -- add-port int-br t2 -- set Interface t2 type=vxlan \
+ options:remote_ip=1.1.2.92 options:key=123 ofport_request=2\
+ -- add-port int-br t1 -- set Interface t1 type=gre \
+ options:remote_ip=1.1.2.92 options:key=456 ofport_request=3])
+
+AT_CHECK([ovs-appctl dpif/show], [0], [dnl
+dummy@ovs-dummy: hit:0 missed:0
+ br0:
+ br0 65534/100: (dummy)
+ p0 1/3: (dummy)
+ int-br:
+ int-br 65534/2: (dummy)
+ t1 3/1: (gre: key=456, remote_ip=1.1.2.92)
+ t2 2/4789: (vxlan: key=123, remote_ip=1.1.2.92)
+])
+
+AT_CHECK([ovs-appctl ovs/route/add 1.1.2.92/24 br0], [0], [OK
+])
+AT_CHECK([ovs-appctl netdev-dummy/ip4addr br0 1.1.2.88/24], [0], [OK
+])
+
+AT_CHECK([ovs-ofctl add-flow br0 action=normal])
+
+dnl Check ARP Snoop
+AT_CHECK([ovs-appctl netdev-dummy/receive br0 'recirc_id(0),in_port(100),eth(src=f8:bc:12:44:34:b6,dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),arp(sip=1.1.2.92,tip=1.1.2.88,op=1,sha=f8:bc:12:44:34:b6,tha=00:00:00:00:00:00)'])
+
+AT_CHECK([ovs-appctl tnl/arp/show], [0], [dnl
+IP MAC Bridge
+=============================================
+1.1.2.92 f8:bc:12:44:34:b6 br0
+])
+
+dnl Check VXLAN tunnel pop
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(3),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=1.1.2.92,dst=1.1.2.88,proto=17,tos=0,ttl=64,frag=no),udp(src=51283,dst=4789)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+ [Datapath actions: tnl_pop(4789)
+])
+
+dnl Check GRE tunnel pop
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(3),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=1.1.2.92,dst=1.1.2.88,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+ [Datapath actions: tnl_pop(1)
+])
+
+dnl Check VXLAN tunnel push
+AT_CHECK([ovs-ofctl add-flow int-br action=2])
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+ [Datapath actions: tnl_push(tnl_port(4789),header(size=50,type=4,eth(dst=f8:bc:12:44:34:b6,src=aa:55:aa:55:00:00,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=17,tos=0,ttl=64,frag=0x40),udp(src=0,dst=4789),vxlan(flags=0x8000000,vni=0x7b00)),out_port(100))
+])
+
+dnl Check GRE tunnel push
+AT_CHECK([ovs-ofctl add-flow int-br action=3])
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+ [Datapath actions: tnl_push(tnl_port(1),header(size=42,type=3,eth(dst=f8:bc:12:44:34:b6,src=aa:55:aa:55:00:00,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=47,tos=0,ttl=64,frag=0x40),gre((flags=0x20,proto=0x6558),key=0x1c8)),out_port(100))
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
AT_DATA([flows.txt], [dnl
actions=IN_PORT
])
+OVS_VSWITCHD_DISABLE_TUNNEL_PUSH_POP
AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
actions=output:1
])
+OVS_VSWITCHD_DISABLE_TUNNEL_PUSH_POP
AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl
actions=output:1
])
+OVS_VSWITCHD_DISABLE_TUNNEL_PUSH_POP
AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl
actions=set_tunnel:1,output:1,set_tunnel:2,output:2,set_tunnel:3,output:3,set_tunnel:5,output:4
])
+OVS_VSWITCHD_DISABLE_TUNNEL_PUSH_POP
AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl
actions=IN_PORT,output:1,output:2,output:3
])
+OVS_VSWITCHD_DISABLE_TUNNEL_PUSH_POP
AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl
tun_id=4,actions=output:5
])
+OVS_VSWITCHD_DISABLE_TUNNEL_PUSH_POP
AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl
options:remote_ip=flow ofport_request=4 \
-- add-port br0 p5 -- set Interface p5 type=gre options:key=flow \
options:remote_ip=5.5.5.5 ofport_request=5])
+
+OVS_VSWITCHD_DISABLE_TUNNEL_PUSH_POP
ADD_OF_PORTS([br0], [90])
AT_DATA([flows.txt], [dnl
in_port=90 actions=resubmit:1,resubmit:2,resubmit:3,resubmit:4,resubmit:5
.so lib/vlog-unixctl.man
.so lib/memory-unixctl.man
.so lib/coverage-unixctl.man
+.so ofproto/ofproto-tnl-unixctl.man
.
.SH "OPENFLOW IMPLEMENTATION"
.