From 7fd91025664b6ecbdca51427a2905a4f35ee3a16 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 9 Oct 2013 17:28:05 -0700 Subject: [PATCH] dpif: Support working around actions that a datapath does not support. Until now, OVS has expected that the datapath supports all the actions required by any flow to be installed. There are at least two reasons why a datapath might not support a given action: - The datapath version is older than the userspace version, and the action was introduced after the version of the datapath in use. - The action is not considered important enough to implement as part of an ABI that must be maintained forever. This commit adds infrastructure to handle these cases. It doesn't actually add any uses; that will come in an upcoming commit. Signed-off-by: Ben Pfaff --- lib/dpif.c | 192 ++++++++++++++++++++++++++++------ lib/dpif.h | 13 ++- lib/odp-util.c | 10 +- lib/odp-util.h | 66 ++++++------ ofproto/ofproto-dpif-upcall.c | 1 + ofproto/ofproto-dpif-xlate.c | 23 ++-- ofproto/ofproto-dpif.c | 3 +- 7 files changed, 232 insertions(+), 76 deletions(-) diff --git a/lib/dpif.c b/lib/dpif.c index b66e3bc03..b284e13c1 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -28,6 +28,7 @@ #include "flow.h" #include "netdev.h" #include "netlink.h" +#include "odp-execute.h" #include "odp-util.h" #include "ofp-errors.h" #include "ofp-print.h" @@ -53,6 +54,7 @@ COVERAGE_DEFINE(dpif_flow_put); COVERAGE_DEFINE(dpif_flow_del); COVERAGE_DEFINE(dpif_execute); COVERAGE_DEFINE(dpif_purge); +COVERAGE_DEFINE(dpif_execute_with_help); static const struct dpif_class *base_dpif_classes[] = { #ifdef LINUX_DATAPATH @@ -1061,6 +1063,94 @@ dpif_flow_dump_done(struct dpif_flow_dump *dump) return dump->error == EOF ? 0 : dump->error; } +struct dpif_execute_helper_aux { + struct dpif *dpif; + int error; +}; + +static void +dpif_execute_helper_execute__(void *aux_, struct ofpbuf *packet, + const struct flow *flow, + const struct nlattr *actions, size_t actions_len) +{ + struct dpif_execute_helper_aux *aux = aux_; + struct dpif_execute execute; + struct odputil_keybuf key_stub; + struct ofpbuf key; + int error; + + ofpbuf_use_stub(&key, &key_stub, sizeof key_stub); + odp_flow_key_from_flow(&key, flow, flow->in_port.odp_port); + + execute.key = key.data; + execute.key_len = key.size; + execute.actions = actions; + execute.actions_len = actions_len; + execute.packet = packet; + execute.needs_help = false; + + error = aux->dpif->dpif_class->execute(aux->dpif, &execute); + if (error) { + aux->error = error; + } +} + +static void +dpif_execute_helper_output_cb(void *aux, struct ofpbuf *packet, + const struct flow *flow, odp_port_t out_port) +{ + uint64_t actions_stub[DIV_ROUND_UP(NL_A_U32_SIZE, 8)]; + struct ofpbuf actions; + + ofpbuf_use_stack(&actions, actions_stub, sizeof actions_stub); + nl_msg_put_u32(&actions, OVS_ACTION_ATTR_OUTPUT, odp_to_u32(out_port)); + + dpif_execute_helper_execute__(aux, packet, flow, + actions.data, actions.size); +} + +static void +dpif_execute_helper_userspace_cb(void *aux, struct ofpbuf *packet, + const struct flow *flow, + const struct nlattr *action) +{ + dpif_execute_helper_execute__(aux, packet, flow, + action, NLA_ALIGN(action->nla_len)); +} + +/* Executes 'execute' by performing most of the actions in userspace and + * passing the fully constructed packets to 'dpif' for output and userspace + * actions. + * + * This helps with actions that a given 'dpif' doesn't implement directly. */ +static int +dpif_execute_with_help(struct dpif *dpif, const struct dpif_execute *execute) +{ + struct dpif_execute_helper_aux aux; + enum odp_key_fitness fit; + struct ofpbuf *packet; + struct flow flow; + + COVERAGE_INC(dpif_execute_with_help); + + fit = odp_flow_key_to_flow(execute->key, execute->key_len, &flow); + if (fit == ODP_FIT_ERROR) { + return EINVAL; + } + + aux.dpif = dpif; + aux.error = 0; + + packet = ofpbuf_clone_with_headroom(execute->packet, VLAN_HEADER_LEN); + odp_execute_actions(&aux, packet, &flow, + execute->actions, execute->actions_len, + dpif_execute_helper_output_cb, + dpif_execute_helper_userspace_cb); + ofpbuf_delete(packet); + + return aux.error; +} + static int dpif_execute__(struct dpif *dpif, const struct dpif_execute *execute) { @@ -1068,7 +1158,9 @@ dpif_execute__(struct dpif *dpif, const struct dpif_execute *execute) COVERAGE_INC(dpif_execute); if (execute->actions_len > 0) { - error = dpif->dpif_class->execute(dpif, execute); + error = (execute->needs_help + ? dpif_execute_with_help(dpif, execute) + : dpif->dpif_class->execute(dpif, execute)); } else { error = 0; } @@ -1084,12 +1176,20 @@ dpif_execute__(struct dpif *dpif, const struct dpif_execute *execute) * it contains some metadata that cannot be recovered from 'packet', such as * tunnel and in_port.) * + * Some dpif providers do not implement every action. The Linux kernel + * datapath, in particular, does not implement ARP field modification. If + * 'needs_help' is true, the dpif layer executes in userspace all of the + * actions that it can, and for OVS_ACTION_ATTR_OUTPUT and + * OVS_ACTION_ATTR_USERSPACE actions it passes the packet through to the dpif + * implementation. + * * Returns 0 if successful, otherwise a positive errno value. */ int dpif_execute(struct dpif *dpif, const struct nlattr *key, size_t key_len, const struct nlattr *actions, size_t actions_len, - const struct ofpbuf *buf) + const struct ofpbuf *buf, + bool needs_help) { struct dpif_execute execute; @@ -1098,6 +1198,7 @@ dpif_execute(struct dpif *dpif, execute.actions = actions; execute.actions_len = actions_len; execute.packet = buf; + execute.needs_help = needs_help; return dpif_execute__(dpif, &execute); } @@ -1110,54 +1211,83 @@ dpif_execute(struct dpif *dpif, void dpif_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops) { - size_t i; - if (dpif->dpif_class->operate) { - dpif->dpif_class->operate(dpif, ops, n_ops); + while (n_ops > 0) { + size_t chunk; + + /* Count 'chunk', the number of ops that can be executed without + * needing any help. Ops that need help should be rare, so we + * expect this to ordinarily be 'n_ops', that is, all the ops. */ + for (chunk = 0; chunk < n_ops; chunk++) { + struct dpif_op *op = ops[chunk]; + + if (op->type == DPIF_OP_EXECUTE && op->u.execute.needs_help) { + break; + } + } + + if (chunk) { + /* Execute a chunk full of ops that the dpif provider can + * handle itself, without help. */ + size_t i; + + dpif->dpif_class->operate(dpif, ops, chunk); + + for (i = 0; i < chunk; i++) { + struct dpif_op *op = ops[i]; + + switch (op->type) { + case DPIF_OP_FLOW_PUT: + log_flow_put_message(dpif, &op->u.flow_put, op->error); + break; + + case DPIF_OP_FLOW_DEL: + log_flow_del_message(dpif, &op->u.flow_del, op->error); + break; + + case DPIF_OP_EXECUTE: + log_execute_message(dpif, &op->u.execute, op->error); + break; + } + } + + ops += chunk; + n_ops -= chunk; + } else { + /* Help the dpif provider to execute one op. */ + struct dpif_op *op = ops[0]; + + op->error = dpif_execute__(dpif, &op->u.execute); + ops++; + n_ops--; + } + } + } else { + size_t i; for (i = 0; i < n_ops; i++) { struct dpif_op *op = ops[i]; switch (op->type) { case DPIF_OP_FLOW_PUT: - log_flow_put_message(dpif, &op->u.flow_put, op->error); + op->error = dpif_flow_put__(dpif, &op->u.flow_put); break; case DPIF_OP_FLOW_DEL: - log_flow_del_message(dpif, &op->u.flow_del, op->error); + op->error = dpif_flow_del__(dpif, &op->u.flow_del); break; case DPIF_OP_EXECUTE: - log_execute_message(dpif, &op->u.execute, op->error); + op->error = dpif_execute__(dpif, &op->u.execute); break; - } - } - return; - } - - for (i = 0; i < n_ops; i++) { - struct dpif_op *op = ops[i]; - - switch (op->type) { - case DPIF_OP_FLOW_PUT: - op->error = dpif_flow_put__(dpif, &op->u.flow_put); - break; - - case DPIF_OP_FLOW_DEL: - op->error = dpif_flow_del__(dpif, &op->u.flow_del); - break; - case DPIF_OP_EXECUTE: - op->error = dpif_execute__(dpif, &op->u.execute); - break; - - default: - NOT_REACHED(); + default: + NOT_REACHED(); + } } } } - /* Returns a string that represents 'type', for use in log messages. */ const char * dpif_upcall_type_to_string(enum dpif_upcall_type type) diff --git a/lib/dpif.h b/lib/dpif.h index 7a258c70b..8996c0a6a 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -493,7 +493,8 @@ int dpif_flow_dump_done(struct dpif_flow_dump *); int dpif_execute(struct dpif *, const struct nlattr *key, size_t key_len, const struct nlattr *actions, size_t actions_len, - const struct ofpbuf *); + const struct ofpbuf *, + bool needs_help); /* Operation batching interface. * @@ -531,11 +532,21 @@ struct dpif_flow_del { }; struct dpif_execute { + /* Raw support for execute passed along to the provider. */ const struct nlattr *key; /* Partial flow key (only for metadata). */ size_t key_len; /* Length of 'key' in bytes. */ const struct nlattr *actions; /* Actions to execute on packet. */ size_t actions_len; /* Length of 'actions' in bytes. */ const struct ofpbuf *packet; /* Packet to execute. */ + + /* Some dpif providers do not implement every action. The Linux kernel + * datapath, in particular, does not implement ARP field modification. + * + * If this member is set to true, the dpif layer executes in userspace all + * of the actions that it can, and for OVS_ACTION_ATTR_OUTPUT and + * OVS_ACTION_ATTR_USERSPACE actions it passes the packet through to the + * dpif implementation. */ + bool needs_help; }; struct dpif_op { diff --git a/lib/odp-util.c b/lib/odp-util.c index 36deaf7d0..8f938a75c 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -3583,13 +3583,17 @@ commit_set_pkt_mark_action(const struct flow *flow, struct flow *base, odp_put_pkt_mark_action(base->pkt_mark, odp_actions); } + /* If any of the flow key data that ODP actions can modify are different in * 'base' and 'flow', appends ODP actions to 'odp_actions' that change the flow * key from 'base' into 'flow', and then changes 'base' the same way. Does not * commit set_tunnel actions. Users should call commit_odp_tunnel_action() * in addition to this function if needed. Sets fields in 'wc' that are - * used as part of the action. */ -void + * used as part of the action. + * + * Returns a reason to force processing the flow's packets into the userspace + * slow path, if there is one, otherwise 0. */ +enum slow_path_reason commit_odp_actions(const struct flow *flow, struct flow *base, struct ofpbuf *odp_actions, struct flow_wildcards *wc, int *mpls_depth_delta) @@ -3605,4 +3609,6 @@ commit_odp_actions(const struct flow *flow, struct flow *base, commit_mpls_action(flow, base, odp_actions, wc, mpls_depth_delta); commit_set_priority_action(flow, base, odp_actions, wc); commit_set_pkt_mark_action(flow, base, odp_actions, wc); + + return 0; } diff --git a/lib/odp-util.h b/lib/odp-util.h index 5cfcbef6d..821b2c412 100644 --- a/lib/odp-util.h +++ b/lib/odp-util.h @@ -35,6 +35,36 @@ struct nlattr; struct ofpbuf; struct simap; +#define SLOW_PATH_REASONS \ + /* These reasons are mutually exclusive. */ \ + SPR(SLOW_CFM, "cfm", "Consists of CFM packets") \ + SPR(SLOW_BFD, "bfd", "Consists of BFD packets") \ + SPR(SLOW_LACP, "lacp", "Consists of LACP packets") \ + SPR(SLOW_STP, "stp", "Consists of STP packets") \ + SPR(SLOW_CONTROLLER, "controller", \ + "Sends \"packet-in\" messages to the OpenFlow controller") \ + SPR(SLOW_ACTION, "action", \ + "Uses action(s) not supported by datapath") + +/* Indexes for slow-path reasons. Client code uses "enum slow_path_reason" + * values instead of these, these are just a way to construct those. */ +enum { +#define SPR(ENUM, STRING, EXPLANATION) ENUM##_INDEX, + SLOW_PATH_REASONS +#undef SPR +}; + +/* Reasons why a subfacet might not be fast-pathable. + * + * Each reason is a separate bit to allow reasons to be combined. */ +enum slow_path_reason { +#define SPR(ENUM, STRING, EXPLANATION) ENUM = 1 << ENUM##_INDEX, + SLOW_PATH_REASONS +#undef SPR +}; + +const char *slow_path_reason_to_explanation(enum slow_path_reason); + #define ODPP_LOCAL ODP_PORT_C(OVSP_LOCAL) #define ODPP_NONE ODP_PORT_C(UINT32_MAX) @@ -141,9 +171,11 @@ const char *odp_key_fitness_to_string(enum odp_key_fitness); void commit_odp_tunnel_action(const struct flow *, struct flow *base, struct ofpbuf *odp_actions); -void commit_odp_actions(const struct flow *, struct flow *base, - struct ofpbuf *odp_actions, struct flow_wildcards *wc, - int *mpls_depth_delta); +enum slow_path_reason commit_odp_actions(const struct flow *, + struct flow *base, + struct ofpbuf *odp_actions, + struct flow_wildcards *wc, + int *mpls_depth_delta); /* ofproto-dpif interface. * @@ -199,32 +231,4 @@ void odp_put_tunnel_action(const struct flow_tnl *tunnel, void odp_put_pkt_mark_action(const uint32_t pkt_mark, struct ofpbuf *odp_actions); -#define SLOW_PATH_REASONS \ - /* These reasons are mutually exclusive. */ \ - SPR(SLOW_CFM, "cfm", "Consists of CFM packets") \ - SPR(SLOW_BFD, "bfd", "Consists of BFD packets") \ - SPR(SLOW_LACP, "lacp", "Consists of LACP packets") \ - SPR(SLOW_STP, "stp", "Consists of STP packets") \ - SPR(SLOW_CONTROLLER, "controller", \ - "Sends \"packet-in\" messages to the OpenFlow controller") - -/* Indexes for slow-path reasons. Client code uses "enum slow_path_reason" - * values instead of these, these are just a way to construct those. */ -enum { -#define SPR(ENUM, STRING, EXPLANATION) ENUM##_INDEX, - SLOW_PATH_REASONS -#undef SPR -}; - -/* Reasons why a subfacet might not be fast-pathable. - * - * Each reason is a separate bit to allow reasons to be combined. */ -enum slow_path_reason { -#define SPR(ENUM, STRING, EXPLANATION) ENUM = 1 << ENUM##_INDEX, - SLOW_PATH_REASONS -#undef SPR -}; - -const char *slow_path_reason_to_explanation(enum slow_path_reason); - #endif /* odp-util.h */ diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index 5222c11e1..954e92fac 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -817,6 +817,7 @@ handle_upcalls(struct udpif *udpif, struct list *upcalls) op->u.execute.packet = packet; op->u.execute.actions = miss->xout.odp_actions.data; op->u.execute.actions_len = miss->xout.odp_actions.size; + op->u.execute.needs_help = (miss->xout.slow & SLOW_ACTION) != 0; } } diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index f103d6a49..4fb0d5e39 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -1660,9 +1660,10 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, } if (out_port != ODPP_NONE) { - commit_odp_actions(flow, &ctx->base_flow, - &ctx->xout->odp_actions, &ctx->xout->wc, - &ctx->mpls_depth_delta); + ctx->xout->slow |= commit_odp_actions(flow, &ctx->base_flow, + &ctx->xout->odp_actions, + &ctx->xout->wc, + &ctx->mpls_depth_delta); nl_msg_put_odp_port(&ctx->xout->odp_actions, OVS_ACTION_ATTR_OUTPUT, out_port); @@ -1825,9 +1826,10 @@ execute_controller_action(struct xlate_ctx *ctx, int len, key.pkt_mark = 0; memset(&key.tunnel, 0, sizeof key.tunnel); - commit_odp_actions(&ctx->xin->flow, &ctx->base_flow, - &ctx->xout->odp_actions, &ctx->xout->wc, - &ctx->mpls_depth_delta); + ctx->xout->slow |= commit_odp_actions(&ctx->xin->flow, &ctx->base_flow, + &ctx->xout->odp_actions, + &ctx->xout->wc, + &ctx->mpls_depth_delta); odp_execute_actions(NULL, packet, &key, ctx->xout->odp_actions.data, ctx->xout->odp_actions.size, NULL, NULL); @@ -2213,9 +2215,10 @@ xlate_sample_action(struct xlate_ctx *ctx, * the same percentage. */ uint32_t probability = (os->probability << 16) | os->probability; - commit_odp_actions(&ctx->xin->flow, &ctx->base_flow, - &ctx->xout->odp_actions, &ctx->xout->wc, - &ctx->mpls_depth_delta); + ctx->xout->slow |= commit_odp_actions(&ctx->xin->flow, &ctx->base_flow, + &ctx->xout->odp_actions, + &ctx->xout->wc, + &ctx->mpls_depth_delta); compose_flow_sample_cookie(os->probability, os->collector_set_id, os->obs_domain_id, os->obs_point_id, &cookie); @@ -2894,7 +2897,7 @@ xlate_send_packet(const struct ofport_dpif *ofport, struct ofpbuf *packet) error = dpif_execute(xport->xbridge->dpif, key.data, key.size, xout.odp_actions.data, xout.odp_actions.size, - packet); + packet, (xout.slow & SLOW_ACTION) != 0); ovs_rwlock_unlock(&xlate_rwlock); out: diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 08dec0391..61324e7f6 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -3839,7 +3839,8 @@ execute_actions(struct ofproto *ofproto_, const struct flow *flow, odp_flow_key_from_flow(&key, flow, ofp_port_to_odp_port(ofproto, in_port)); error = dpif_execute(ofproto->backer->dpif, key.data, key.size, - xout.odp_actions.data, xout.odp_actions.size, packet); + xout.odp_actions.data, xout.odp_actions.size, packet, + (xout.slow & SLOW_ACTION) != 0); xlate_out_uninit(&xout); return error; -- 2.20.1