2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at:
6 * http://www.apache.org/licenses/LICENSE-2.0
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
21 #include "command-line.h"
24 #include "dynamic-string.h"
25 #include "fatal-signal.h"
29 #include "ovn/lib/lex.h"
30 #include "ovn/lib/ovn-nb-idl.h"
31 #include "ovn/lib/ovn-sb-idl.h"
32 #include "poll-loop.h"
35 #include "stream-ssl.h"
39 #include "openvswitch/vlog.h"
41 VLOG_DEFINE_THIS_MODULE(ovn_northd);
43 static unixctl_cb_func ovn_northd_exit;
45 struct northd_context {
46 struct ovsdb_idl *ovnnb_idl;
47 struct ovsdb_idl *ovnsb_idl;
48 struct ovsdb_idl_txn *ovnnb_txn;
49 struct ovsdb_idl_txn *ovnsb_txn;
52 static const char *ovnnb_db;
53 static const char *ovnsb_db;
55 static const char *default_db(void);
57 /* Pipeline stages. */
59 /* The two pipelines in an OVN logical flow table. */
61 P_IN, /* Ingress pipeline. */
62 P_OUT /* Egress pipeline. */
65 /* The two purposes for which ovn-northd uses OVN logical datapaths. */
66 enum ovn_datapath_type {
67 DP_SWITCH, /* OVN logical switch. */
68 DP_ROUTER /* OVN logical router. */
71 /* Returns an "enum ovn_stage" built from the arguments.
73 * (It's better to use ovn_stage_build() for type-safety reasons, but inline
74 * functions can't be used in enums or switch cases.) */
75 #define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
76 (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
78 /* A stage within an OVN logical switch or router.
80 * An "enum ovn_stage" indicates whether the stage is part of a logical switch
81 * or router, whether the stage is part of the ingress or egress pipeline, and
82 * the table within that pipeline. The first three components are combined to
83 * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC,
84 * S_ROUTER_OUT_DELIVERY. */
86 #define PIPELINE_STAGES \
87 /* Logical switch ingress stages. */ \
88 PIPELINE_STAGE(SWITCH, IN, PORT_SEC, 0, "ls_in_port_sec") \
89 PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 1, "ls_in_pre_acl") \
90 PIPELINE_STAGE(SWITCH, IN, ACL, 2, "ls_in_acl") \
91 PIPELINE_STAGE(SWITCH, IN, ARP_RSP, 3, "ls_in_arp_rsp") \
92 PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 4, "ls_in_l2_lkup") \
94 /* Logical switch egress stages. */ \
95 PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 0, "ls_out_pre_acl") \
96 PIPELINE_STAGE(SWITCH, OUT, ACL, 1, "ls_out_acl") \
97 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC, 2, "ls_out_port_sec") \
99 /* Logical router ingress stages. */ \
100 PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \
101 PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \
102 PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 2, "lr_in_ip_routing") \
103 PIPELINE_STAGE(ROUTER, IN, ARP, 3, "lr_in_arp") \
105 /* Logical router egress stages. */ \
106 PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 0, "lr_out_delivery")
108 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
109 S_##DP_TYPE##_##PIPELINE##_##STAGE \
110 = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
112 #undef PIPELINE_STAGE
115 /* Due to various hard-coded priorities need to implement ACLs, the
116 * northbound database supports a smaller range of ACL priorities than
117 * are available to logical flows. This value is added to an ACL
118 * priority to determine the ACL's logical flow priority. */
119 #define OVN_ACL_PRI_OFFSET 1000
121 /* Returns an "enum ovn_stage" built from the arguments. */
122 static enum ovn_stage
123 ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
126 return OVN_STAGE_BUILD(dp_type, pipeline, table);
129 /* Returns the pipeline to which 'stage' belongs. */
130 static enum ovn_pipeline
131 ovn_stage_get_pipeline(enum ovn_stage stage)
133 return (stage >> 8) & 1;
136 /* Returns the table to which 'stage' belongs. */
138 ovn_stage_get_table(enum ovn_stage stage)
143 /* Returns a string name for 'stage'. */
145 ovn_stage_to_str(enum ovn_stage stage)
148 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
149 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
151 #undef PIPELINE_STAGE
152 default: return "<unknown>";
160 %s: OVN northbound management daemon\n\
161 usage: %s [OPTIONS]\n\
164 --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
166 --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
168 -h, --help display this help message\n\
169 -o, --options list available options\n\
170 -V, --version display version information\n\
171 ", program_name, program_name, default_db(), default_db());
174 stream_usage("database", true, true, false);
178 struct hmap_node hmap_node;
183 destroy_tnlids(struct hmap *tnlids)
185 struct tnlid_node *node, *next;
186 HMAP_FOR_EACH_SAFE (node, next, hmap_node, tnlids) {
187 hmap_remove(tnlids, &node->hmap_node);
190 hmap_destroy(tnlids);
194 add_tnlid(struct hmap *set, uint32_t tnlid)
196 struct tnlid_node *node = xmalloc(sizeof *node);
197 hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0));
202 tnlid_in_use(const struct hmap *set, uint32_t tnlid)
204 const struct tnlid_node *node;
205 HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) {
206 if (node->tnlid == tnlid) {
214 allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
217 for (uint32_t tnlid = *hint + 1; tnlid != *hint;
218 tnlid = tnlid + 1 <= max ? tnlid + 1 : 1) {
219 if (!tnlid_in_use(set, tnlid)) {
220 add_tnlid(set, tnlid);
226 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
227 VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
231 /* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
232 * sb->external_ids:logical-switch. */
233 struct ovn_datapath {
234 struct hmap_node key_node; /* Index on 'key'. */
235 struct uuid key; /* (nbs/nbr)->header_.uuid. */
237 const struct nbrec_logical_switch *nbs; /* May be NULL. */
238 const struct nbrec_logical_router *nbr; /* May be NULL. */
239 const struct sbrec_datapath_binding *sb; /* May be NULL. */
241 struct ovs_list list; /* In list of similar records. */
243 /* Logical router data (digested from nbr). */
246 /* Logical switch data. */
247 struct ovn_port **router_ports;
248 size_t n_router_ports;
250 struct hmap port_tnlids;
251 uint32_t port_key_hint;
256 static struct ovn_datapath *
257 ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
258 const struct nbrec_logical_switch *nbs,
259 const struct nbrec_logical_router *nbr,
260 const struct sbrec_datapath_binding *sb)
262 struct ovn_datapath *od = xzalloc(sizeof *od);
267 hmap_init(&od->port_tnlids);
268 od->port_key_hint = 0;
269 hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
274 ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
277 /* Don't remove od->list. It is used within build_datapaths() as a
278 * private list and once we've exited that function it is not safe to
280 hmap_remove(datapaths, &od->key_node);
281 destroy_tnlids(&od->port_tnlids);
282 free(od->router_ports);
287 static struct ovn_datapath *
288 ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
290 struct ovn_datapath *od;
292 HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) {
293 if (uuid_equals(uuid, &od->key)) {
300 static struct ovn_datapath *
301 ovn_datapath_from_sbrec(struct hmap *datapaths,
302 const struct sbrec_datapath_binding *sb)
306 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
307 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
310 return ovn_datapath_find(datapaths, &key);
314 join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
315 struct ovs_list *sb_only, struct ovs_list *nb_only,
316 struct ovs_list *both)
318 hmap_init(datapaths);
323 const struct sbrec_datapath_binding *sb, *sb_next;
324 SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
326 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
327 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
328 ovsdb_idl_txn_add_comment(
330 "deleting Datapath_Binding "UUID_FMT" that lacks "
331 "external-ids:logical-switch and "
332 "external-ids:logical-router",
333 UUID_ARGS(&sb->header_.uuid));
334 sbrec_datapath_binding_delete(sb);
338 if (ovn_datapath_find(datapaths, &key)) {
339 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
341 &rl, "deleting Datapath_Binding "UUID_FMT" with "
342 "duplicate external-ids:logical-switch/router "UUID_FMT,
343 UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key));
344 sbrec_datapath_binding_delete(sb);
348 struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
350 list_push_back(sb_only, &od->list);
353 const struct nbrec_logical_switch *nbs;
354 NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
355 struct ovn_datapath *od = ovn_datapath_find(datapaths,
359 list_remove(&od->list);
360 list_push_back(both, &od->list);
362 od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
364 list_push_back(nb_only, &od->list);
368 const struct nbrec_logical_router *nbr;
369 NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
370 struct ovn_datapath *od = ovn_datapath_find(datapaths,
375 list_remove(&od->list);
376 list_push_back(both, &od->list);
379 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
381 "duplicate UUID "UUID_FMT" in OVN_Northbound",
382 UUID_ARGS(&nbr->header_.uuid));
386 od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
388 list_push_back(nb_only, &od->list);
392 if (nbr->default_gw) {
394 char *error = ip_parse_masked(nbr->default_gw, &ip, &mask);
395 if (error || !ip || mask != OVS_BE32_MAX) {
396 static struct vlog_rate_limit rl
397 = VLOG_RATE_LIMIT_INIT(5, 1);
398 VLOG_WARN_RL(&rl, "bad 'gateway' %s", nbr->default_gw);
408 ovn_datapath_allocate_key(struct hmap *dp_tnlids)
410 static uint32_t hint;
411 return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint);
415 build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
417 struct ovs_list sb_only, nb_only, both;
419 join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both);
421 if (!list_is_empty(&nb_only)) {
422 /* First index the in-use datapath tunnel IDs. */
423 struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
424 struct ovn_datapath *od;
425 LIST_FOR_EACH (od, list, &both) {
426 add_tnlid(&dp_tnlids, od->sb->tunnel_key);
429 /* Add southbound record for each unmatched northbound record. */
430 LIST_FOR_EACH (od, list, &nb_only) {
431 uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
436 od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
438 char uuid_s[UUID_LEN + 1];
439 sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
440 const char *key = od->nbs ? "logical-switch" : "logical-router";
441 const struct smap id = SMAP_CONST1(&id, key, uuid_s);
442 sbrec_datapath_binding_set_external_ids(od->sb, &id);
444 sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
446 destroy_tnlids(&dp_tnlids);
449 /* Delete southbound records without northbound matches. */
450 struct ovn_datapath *od, *next;
451 LIST_FOR_EACH_SAFE (od, next, list, &sb_only) {
452 list_remove(&od->list);
453 sbrec_datapath_binding_delete(od->sb);
454 ovn_datapath_destroy(datapaths, od);
459 struct hmap_node key_node; /* Index on 'key'. */
460 char *key; /* nbs->name, nbr->name, sb->logical_port. */
461 char *json_key; /* 'key', quoted for use in JSON. */
463 const struct nbrec_logical_port *nbs; /* May be NULL. */
464 const struct nbrec_logical_router_port *nbr; /* May be NULL. */
465 const struct sbrec_port_binding *sb; /* May be NULL. */
467 /* Logical router port data. */
468 ovs_be32 ip, mask; /* 192.168.10.123/24. */
469 ovs_be32 network; /* 192.168.10.0. */
470 ovs_be32 bcast; /* 192.168.10.255. */
472 struct ovn_port *peer;
474 struct ovn_datapath *od;
476 struct ovs_list list; /* In list of similar records. */
479 static struct ovn_port *
480 ovn_port_create(struct hmap *ports, const char *key,
481 const struct nbrec_logical_port *nbs,
482 const struct nbrec_logical_router_port *nbr,
483 const struct sbrec_port_binding *sb)
485 struct ovn_port *op = xzalloc(sizeof *op);
487 struct ds json_key = DS_EMPTY_INITIALIZER;
488 json_string_escape(key, &json_key);
489 op->json_key = ds_steal_cstr(&json_key);
491 op->key = xstrdup(key);
495 hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
500 ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
503 /* Don't remove port->list. It is used within build_ports() as a
504 * private list and once we've exited that function it is not safe to
506 hmap_remove(ports, &port->key_node);
507 free(port->json_key);
513 static struct ovn_port *
514 ovn_port_find(struct hmap *ports, const char *name)
518 HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
519 if (!strcmp(op->key, name)) {
527 ovn_port_allocate_key(struct ovn_datapath *od)
529 return allocate_tnlid(&od->port_tnlids, "port",
530 (1u << 15) - 1, &od->port_key_hint);
534 join_logical_ports(struct northd_context *ctx,
535 struct hmap *datapaths, struct hmap *ports,
536 struct ovs_list *sb_only, struct ovs_list *nb_only,
537 struct ovs_list *both)
544 const struct sbrec_port_binding *sb;
545 SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
546 struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
548 list_push_back(sb_only, &op->list);
551 struct ovn_datapath *od;
552 HMAP_FOR_EACH (od, key_node, datapaths) {
554 for (size_t i = 0; i < od->nbs->n_ports; i++) {
555 const struct nbrec_logical_port *nbs = od->nbs->ports[i];
556 struct ovn_port *op = ovn_port_find(ports, nbs->name);
558 if (op->nbs || op->nbr) {
559 static struct vlog_rate_limit rl
560 = VLOG_RATE_LIMIT_INIT(5, 1);
561 VLOG_WARN_RL(&rl, "duplicate logical port %s",
566 list_remove(&op->list);
567 list_push_back(both, &op->list);
569 op = ovn_port_create(ports, nbs->name, nbs, NULL, NULL);
570 list_push_back(nb_only, &op->list);
576 for (size_t i = 0; i < od->nbr->n_ports; i++) {
577 const struct nbrec_logical_router_port *nbr
581 if (!eth_addr_from_string(nbr->mac, &mac)) {
582 static struct vlog_rate_limit rl
583 = VLOG_RATE_LIMIT_INIT(5, 1);
584 VLOG_WARN_RL(&rl, "bad 'mac' %s", nbr->mac);
589 char *error = ip_parse_masked(nbr->network, &ip, &mask);
590 if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) {
591 static struct vlog_rate_limit rl
592 = VLOG_RATE_LIMIT_INIT(5, 1);
593 VLOG_WARN_RL(&rl, "bad 'network' %s", nbr->network);
598 struct ovn_port *op = ovn_port_find(ports, nbr->name);
600 if (op->nbs || op->nbr) {
601 static struct vlog_rate_limit rl
602 = VLOG_RATE_LIMIT_INIT(5, 1);
603 VLOG_WARN_RL(&rl, "duplicate logical router port %s",
608 list_remove(&op->list);
609 list_push_back(both, &op->list);
611 op = ovn_port_create(ports, nbr->name, NULL, nbr, NULL);
612 list_push_back(nb_only, &op->list);
617 op->network = ip & mask;
618 op->bcast = ip | ~mask;
626 /* Connect logical router ports, and logical switch ports of type "router",
629 HMAP_FOR_EACH (op, key_node, ports) {
630 if (op->nbs && !strcmp(op->nbs->type, "router")) {
631 const char *peer_name = smap_get(&op->nbs->options, "router-port");
636 struct ovn_port *peer = ovn_port_find(ports, peer_name);
637 if (!peer || !peer->nbr) {
643 op->od->router_ports = xrealloc(
644 op->od->router_ports,
645 sizeof *op->od->router_ports * (op->od->n_router_ports + 1));
646 op->od->router_ports[op->od->n_router_ports++] = op;
647 } else if (op->nbr && op->nbr->peer) {
648 op->peer = ovn_port_find(ports, op->nbr->name);
654 ovn_port_update_sbrec(const struct ovn_port *op)
656 sbrec_port_binding_set_datapath(op->sb, op->od->sb);
658 sbrec_port_binding_set_type(op->sb, "patch");
660 const char *peer = op->peer ? op->peer->key : "<error>";
661 const struct smap ids = SMAP_CONST1(&ids, "peer", peer);
662 sbrec_port_binding_set_options(op->sb, &ids);
664 sbrec_port_binding_set_parent_port(op->sb, NULL);
665 sbrec_port_binding_set_tag(op->sb, NULL, 0);
666 sbrec_port_binding_set_mac(op->sb, NULL, 0);
668 if (strcmp(op->nbs->type, "router")) {
669 sbrec_port_binding_set_type(op->sb, op->nbs->type);
670 sbrec_port_binding_set_options(op->sb, &op->nbs->options);
672 sbrec_port_binding_set_type(op->sb, "patch");
674 const char *router_port = smap_get(&op->nbs->options,
677 router_port = "<error>";
679 const struct smap ids = SMAP_CONST1(&ids, "peer", router_port);
680 sbrec_port_binding_set_options(op->sb, &ids);
682 sbrec_port_binding_set_parent_port(op->sb, op->nbs->parent_name);
683 sbrec_port_binding_set_tag(op->sb, op->nbs->tag, op->nbs->n_tag);
684 sbrec_port_binding_set_mac(op->sb, (const char **) op->nbs->addresses,
685 op->nbs->n_addresses);
690 build_ports(struct northd_context *ctx, struct hmap *datapaths,
693 struct ovs_list sb_only, nb_only, both;
695 join_logical_ports(ctx, datapaths, ports, &sb_only, &nb_only, &both);
697 /* For logical ports that are in both databases, update the southbound
698 * record based on northbound data. Also index the in-use tunnel_keys. */
699 struct ovn_port *op, *next;
700 LIST_FOR_EACH_SAFE (op, next, list, &both) {
701 ovn_port_update_sbrec(op);
703 add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
704 if (op->sb->tunnel_key > op->od->port_key_hint) {
705 op->od->port_key_hint = op->sb->tunnel_key;
709 /* Add southbound record for each unmatched northbound record. */
710 LIST_FOR_EACH_SAFE (op, next, list, &nb_only) {
711 uint16_t tunnel_key = ovn_port_allocate_key(op->od);
716 op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
717 ovn_port_update_sbrec(op);
719 sbrec_port_binding_set_logical_port(op->sb, op->key);
720 sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
723 /* Delete southbound records without northbound matches. */
724 LIST_FOR_EACH_SAFE(op, next, list, &sb_only) {
725 list_remove(&op->list);
726 sbrec_port_binding_delete(op->sb);
727 ovn_port_destroy(ports, op);
731 #define OVN_MIN_MULTICAST 32768
732 #define OVN_MAX_MULTICAST 65535
734 struct multicast_group {
736 uint16_t key; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
739 #define MC_FLOOD "_MC_flood"
740 static const struct multicast_group mc_flood = { MC_FLOOD, 65535 };
742 #define MC_UNKNOWN "_MC_unknown"
743 static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 };
746 multicast_group_equal(const struct multicast_group *a,
747 const struct multicast_group *b)
749 return !strcmp(a->name, b->name) && a->key == b->key;
752 /* Multicast group entry. */
753 struct ovn_multicast {
754 struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */
755 struct ovn_datapath *datapath;
756 const struct multicast_group *group;
758 struct ovn_port **ports;
759 size_t n_ports, allocated_ports;
763 ovn_multicast_hash(const struct ovn_datapath *datapath,
764 const struct multicast_group *group)
766 return hash_pointer(datapath, group->key);
769 static struct ovn_multicast *
770 ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath,
771 const struct multicast_group *group)
773 struct ovn_multicast *mc;
775 HMAP_FOR_EACH_WITH_HASH (mc, hmap_node,
776 ovn_multicast_hash(datapath, group), mcgroups) {
777 if (mc->datapath == datapath
778 && multicast_group_equal(mc->group, group)) {
786 ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group,
787 struct ovn_port *port)
789 struct ovn_datapath *od = port->od;
790 struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group);
792 mc = xmalloc(sizeof *mc);
793 hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group));
797 mc->allocated_ports = 4;
798 mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports);
800 if (mc->n_ports >= mc->allocated_ports) {
801 mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports,
804 mc->ports[mc->n_ports++] = port;
808 ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc)
811 hmap_remove(mcgroups, &mc->hmap_node);
818 ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
819 const struct sbrec_multicast_group *sb)
821 struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports);
822 for (size_t i = 0; i < mc->n_ports; i++) {
823 ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb);
825 sbrec_multicast_group_set_ports(sb, ports, mc->n_ports);
829 /* Logical flow generation.
831 * This code generates the Logical_Flow table in the southbound database, as a
832 * function of most of the northbound database.
836 struct hmap_node hmap_node;
838 struct ovn_datapath *od;
839 enum ovn_stage stage;
846 ovn_lflow_hash(const struct ovn_lflow *lflow)
848 size_t hash = uuid_hash(&lflow->od->key);
849 hash = hash_2words((lflow->stage << 16) | lflow->priority, hash);
850 hash = hash_string(lflow->match, hash);
851 return hash_string(lflow->actions, hash);
855 ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b)
857 return (a->od == b->od
858 && a->stage == b->stage
859 && a->priority == b->priority
860 && !strcmp(a->match, b->match)
861 && !strcmp(a->actions, b->actions));
865 ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
866 enum ovn_stage stage, uint16_t priority,
867 char *match, char *actions)
870 lflow->stage = stage;
871 lflow->priority = priority;
872 lflow->match = match;
873 lflow->actions = actions;
876 /* Adds a row with the specified contents to the Logical_Flow table. */
878 ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
879 enum ovn_stage stage, uint16_t priority,
880 const char *match, const char *actions)
882 struct ovn_lflow *lflow = xmalloc(sizeof *lflow);
883 ovn_lflow_init(lflow, od, stage, priority,
884 xstrdup(match), xstrdup(actions));
885 hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow));
888 static struct ovn_lflow *
889 ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od,
890 enum ovn_stage stage, uint16_t priority,
891 const char *match, const char *actions)
893 struct ovn_lflow target;
894 ovn_lflow_init(&target, od, stage, priority,
895 CONST_CAST(char *, match), CONST_CAST(char *, actions));
897 struct ovn_lflow *lflow;
898 HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, ovn_lflow_hash(&target),
900 if (ovn_lflow_equal(lflow, &target)) {
908 ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow)
911 hmap_remove(lflows, &lflow->hmap_node);
913 free(lflow->actions);
918 struct ipv4_netaddr {
923 struct ipv6_netaddr {
924 struct in6_addr addr;
928 struct lport_addresses {
931 struct ipv4_netaddr *ipv4_addrs;
933 struct ipv6_netaddr *ipv6_addrs;
937 * Extracts the mac, ipv4 and ipv6 addresses from the input param 'address'
938 * which should be of the format 'MAC [IP1 IP2 ..]" where IPn should be
939 * a valid IPv4 or IPv6 address and stores them in the 'ipv4_addrs' and
940 * 'ipv6_addrs' fields of input param 'laddrs'.
941 * The caller has to free the 'ipv4_addrs' and 'ipv6_addrs' fields.
942 * If input param 'store_ipv6' is true only then extracted ipv6 addresses
943 * are stored in 'ipv6_addrs' fields.
944 * Return true if at least 'MAC' is found in 'address', false otherwise.
946 * If 'address' = '00:00:00:00:00:01 10.0.0.4 fe80::ea2a:eaff:fe28:3390/64
947 * 30.0.0.3/23' and 'store_ipv6' = true
948 * then returns true with laddrs->n_ipv4_addrs = 2, naddrs->n_ipv6_addrs = 1.
951 * If 'address' = '00:00:00:00:00:01 10.0.0.4 fe80::ea2a:eaff:fe28:3390/64
952 * 30.0.0.3/23' and 'store_ipv6' = false
953 * then returns true with laddrs->n_ipv4_addrs = 2, naddrs->n_ipv6_addrs = 0.
955 * Eg 3. If 'address' = '00:00:00:00:00:01 10.0.0.4 addr 30.0.0.4', then
956 * returns true with laddrs->n_ipv4_addrs = 1 and laddrs->n_ipv6_addrs = 0.
959 extract_lport_addresses(char *address, struct lport_addresses *laddrs,
964 char *buf_end = buf + strlen(address);
965 if (!ovs_scan_len(buf, &buf_index, ETH_ADDR_SCAN_FMT,
966 ETH_ADDR_SCAN_ARGS(laddrs->ea))) {
975 laddrs->n_ipv4_addrs = 0;
976 laddrs->n_ipv6_addrs = 0;
977 laddrs->ipv4_addrs = NULL;
978 laddrs->ipv6_addrs = NULL;
980 /* Loop through the buffer and extract the IPv4/IPv6 addresses
981 * and store in the 'laddrs'. Break the loop if invalid data is found.
984 while (buf < buf_end) {
986 error = ip_parse_cidr_len(buf, &buf_index, &ip4, &plen);
988 laddrs->n_ipv4_addrs++;
989 laddrs->ipv4_addrs = xrealloc(
991 sizeof (struct ipv4_netaddr) * laddrs->n_ipv4_addrs);
992 laddrs->ipv4_addrs[laddrs->n_ipv4_addrs - 1].addr = ip4;
993 laddrs->ipv4_addrs[laddrs->n_ipv4_addrs - 1].plen = plen;
998 error = ipv6_parse_cidr_len(buf, &buf_index, &ip6, &plen);
999 if (!error && store_ipv6) {
1000 laddrs->n_ipv6_addrs++;
1001 laddrs->ipv6_addrs = xrealloc(
1003 sizeof(struct ipv6_netaddr) * laddrs->n_ipv6_addrs);
1004 memcpy(&laddrs->ipv6_addrs[laddrs->n_ipv6_addrs - 1].addr, &ip6,
1005 sizeof(struct in6_addr));
1006 laddrs->ipv6_addrs[laddrs->n_ipv6_addrs - 1].plen = plen;
1010 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1011 VLOG_INFO_RL(&rl, "invalid syntax '%s' in address", address);
1021 /* Appends port security constraints on L2 address field 'eth_addr_field'
1022 * (e.g. "eth.src" or "eth.dst") to 'match'. 'port_security', with
1023 * 'n_port_security' elements, is the collection of port_security constraints
1024 * from an OVN_NB Logical_Port row. */
1026 build_port_security(const char *eth_addr_field,
1027 char **port_security, size_t n_port_security,
1030 size_t base_len = match->length;
1031 ds_put_format(match, " && %s == {", eth_addr_field);
1034 for (size_t i = 0; i < n_port_security; i++) {
1037 if (eth_addr_from_string(port_security[i], &ea)) {
1038 ds_put_format(match, ETH_ADDR_FMT, ETH_ADDR_ARGS(ea));
1039 ds_put_char(match, ' ');
1043 ds_chomp(match, ' ');
1044 ds_put_cstr(match, "}");
1047 match->length = base_len;
1052 lport_is_enabled(const struct nbrec_logical_port *lport)
1054 return !lport->enabled || *lport->enabled;
1058 lport_is_up(const struct nbrec_logical_port *lport)
1060 return !lport->up || *lport->up;
1064 has_stateful_acl(struct ovn_datapath *od)
1066 for (size_t i = 0; i < od->nbs->n_acls; i++) {
1067 struct nbrec_acl *acl = od->nbs->acls[i];
1068 if (!strcmp(acl->action, "allow-related")) {
1077 build_acls(struct ovn_datapath *od, struct hmap *lflows, struct hmap *ports)
1079 bool has_stateful = has_stateful_acl(od);
1080 struct ovn_port *op;
1081 struct ds match_in, match_out;
1083 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
1084 * allowed by default. */
1085 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
1086 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;");
1088 /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
1089 * default. A related rule at priority 1 is added below if there
1090 * are any stateful ACLs in this datapath. */
1091 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
1092 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
1094 /* If there are any stateful ACL rules in this dapapath, we must
1095 * send all IP packets through the conntrack action, which handles
1096 * defragmentation, in order to match L4 headers. */
1098 HMAP_FOR_EACH (op, key_node, ports) {
1099 if (op->od == od && !strcmp(op->nbs->type, "router")) {
1100 /* Can't use ct() for router ports. Consider the following configuration:
1101 lp1(10.0.0.2) on hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB,
1102 For a ping from lp1 to lp2, First, the response will go through ct()
1103 with a zone for lp2 in the ls2 ingress pipeline on hostB.
1104 That ct zone knows about this connection. Next, it goes through ct()
1105 with the zone for the router port in the egress pipeline of ls2 on hostB.
1106 This zone does not know about the connection, as the icmp request
1107 went through the logical router on hostA, not hostB. This would only work
1108 with distributed conntrack state across all chassis. */
1111 ds_init(&match_out);
1112 ds_put_format(&match_in, "ip && inport == %s", op->json_key);
1113 ds_put_format(&match_out, "ip && outport == %s", op->json_key);
1114 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, ds_cstr(&match_in), "next;");
1115 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, ds_cstr(&match_out), "next;");
1117 ds_destroy(&match_in);
1118 ds_destroy(&match_out);
1122 /* Ingress and Egress Pre-ACL Table (Priority 100).
1124 * Regardless of whether the ACL is "from-lport" or "to-lport",
1125 * we need rules in both the ingress and egress table, because
1126 * the return traffic needs to be followed. */
1127 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip", "ct_next;");
1128 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip", "ct_next;");
1130 /* Ingress and Egress ACL Table (Priority 1).
1132 * By default, traffic is allowed. This is partially handled by
1133 * the Priority 0 ACL flows added earlier, but we also need to
1134 * commit IP flows. This is because, while the initiater's
1135 * direction may not have any stateful rules, the server's may
1136 * and then its return traffic would not have an associated
1137 * conntrack entry and would return "+invalid". */
1138 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1, "ip",
1139 "ct_commit; next;");
1140 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1, "ip",
1141 "ct_commit; next;");
1143 /* Ingress and Egress ACL Table (Priority 65535).
1145 * Always drop traffic that's in an invalid state. This is
1146 * enforced at a higher priority than ACLs can be defined. */
1147 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1149 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1152 /* Ingress and Egress ACL Table (Priority 65535).
1154 * Always allow traffic that is established to a committed
1155 * conntrack entry. This is enforced at a higher priority than
1156 * ACLs can be defined. */
1157 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1158 "ct.est && !ct.rel && !ct.new && !ct.inv",
1160 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1161 "ct.est && !ct.rel && !ct.new && !ct.inv",
1164 /* Ingress and Egress ACL Table (Priority 65535).
1166 * Always allow traffic that is related to an existing conntrack
1167 * entry. This is enforced at a higher priority than ACLs can
1170 * NOTE: This does not support related data sessions (eg,
1171 * a dynamically negotiated FTP data channel), but will allow
1172 * related traffic such as an ICMP Port Unreachable through
1173 * that's generated from a non-listening UDP port. */
1174 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1175 "!ct.est && ct.rel && !ct.new && !ct.inv",
1177 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1178 "!ct.est && ct.rel && !ct.new && !ct.inv",
1182 /* Ingress or Egress ACL Table (Various priorities). */
1183 for (size_t i = 0; i < od->nbs->n_acls; i++) {
1184 struct nbrec_acl *acl = od->nbs->acls[i];
1185 bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
1186 enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
1188 if (!strcmp(acl->action, "allow")) {
1189 /* If there are any stateful flows, we must even commit "allow"
1190 * actions. This is because, while the initiater's
1191 * direction may not have any stateful rules, the server's
1192 * may and then its return traffic would not have an
1193 * associated conntrack entry and would return "+invalid". */
1194 const char *actions = has_stateful ? "ct_commit; next;" : "next;";
1195 ovn_lflow_add(lflows, od, stage,
1196 acl->priority + OVN_ACL_PRI_OFFSET,
1197 acl->match, actions);
1198 } else if (!strcmp(acl->action, "allow-related")) {
1199 struct ds match = DS_EMPTY_INITIALIZER;
1201 /* Commit the connection tracking entry, which allows all
1202 * other traffic related to this entry to flow due to the
1203 * 65535 priority flow defined earlier. */
1204 ds_put_format(&match, "ct.new && (%s)", acl->match);
1205 ovn_lflow_add(lflows, od, stage,
1206 acl->priority + OVN_ACL_PRI_OFFSET,
1207 ds_cstr(&match), "ct_commit; next;");
1210 } else if (!strcmp(acl->action, "drop")) {
1211 ovn_lflow_add(lflows, od, stage,
1212 acl->priority + OVN_ACL_PRI_OFFSET,
1213 acl->match, "drop;");
1214 } else if (!strcmp(acl->action, "reject")) {
1215 /* xxx Need to support "reject". */
1216 VLOG_INFO("reject is not a supported action");
1217 ovn_lflow_add(lflows, od, stage,
1218 acl->priority + OVN_ACL_PRI_OFFSET,
1219 acl->match, "drop;");
1225 build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
1226 struct hmap *lflows, struct hmap *mcgroups)
1228 /* This flow table structure is documented in ovn-northd(8), so please
1229 * update ovn-northd.8.xml if you change anything. */
1231 /* Build pre-ACL and ACL tables for both ingress and egress.
1232 * Ingress tables 1 and 2. Egress tables 0 and 1. */
1233 struct ovn_datapath *od;
1234 HMAP_FOR_EACH (od, key_node, datapaths) {
1239 build_acls(od, lflows, ports);
1242 /* Logical switch ingress table 0: Admission control framework (priority
1244 HMAP_FOR_EACH (od, key_node, datapaths) {
1249 /* Logical VLANs not supported. */
1250 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC, 100, "vlan.present",
1253 /* Broadcast/multicast source address is invalid. */
1254 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC, 100, "eth.src[40]",
1257 /* Port security flows have priority 50 (see below) and will continue
1258 * to the next table if packet source is acceptable. */
1261 /* Logical switch ingress table 0: Ingress port security (priority 50). */
1262 struct ovn_port *op;
1263 HMAP_FOR_EACH (op, key_node, ports) {
1268 if (!lport_is_enabled(op->nbs)) {
1269 /* Drop packets from disabled logical ports (since logical flow
1270 * tables are default-drop). */
1274 struct ds match = DS_EMPTY_INITIALIZER;
1275 ds_put_format(&match, "inport == %s", op->json_key);
1276 build_port_security("eth.src",
1277 op->nbs->port_security, op->nbs->n_port_security,
1279 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC, 50,
1280 ds_cstr(&match), "next;");
1284 /* Ingress table 3: ARP responder, skip requests coming from localnet ports.
1285 * (priority 100). */
1286 HMAP_FOR_EACH (op, key_node, ports) {
1291 if (!strcmp(op->nbs->type, "localnet")) {
1292 char *match = xasprintf("inport == %s", op->json_key);
1293 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_RSP, 100,
1299 /* Ingress table 3: ARP responder, reply for known IPs.
1301 HMAP_FOR_EACH (op, key_node, ports) {
1307 * Add ARP reply flows if either the
1309 * - port type is router
1311 if (!lport_is_up(op->nbs) && strcmp(op->nbs->type, "router")) {
1315 for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1316 struct lport_addresses laddrs;
1317 if (!extract_lport_addresses(op->nbs->addresses[i], &laddrs,
1321 for (size_t j = 0; j < laddrs.n_ipv4_addrs; j++) {
1322 char *match = xasprintf(
1323 "arp.tpa == "IP_FMT" && arp.op == 1",
1324 IP_ARGS(laddrs.ipv4_addrs[j].addr));
1325 char *actions = xasprintf(
1326 "eth.dst = eth.src; "
1327 "eth.src = "ETH_ADDR_FMT"; "
1328 "arp.op = 2; /* ARP reply */ "
1329 "arp.tha = arp.sha; "
1330 "arp.sha = "ETH_ADDR_FMT"; "
1331 "arp.tpa = arp.spa; "
1332 "arp.spa = "IP_FMT"; "
1333 "outport = inport; "
1334 "inport = \"\"; /* Allow sending out inport. */ "
1336 ETH_ADDR_ARGS(laddrs.ea),
1337 ETH_ADDR_ARGS(laddrs.ea),
1338 IP_ARGS(laddrs.ipv4_addrs[j].addr));
1339 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_RSP, 50,
1345 free(laddrs.ipv4_addrs);
1349 /* Ingress table 3: ARP responder, by default goto next.
1351 HMAP_FOR_EACH (od, key_node, datapaths) {
1356 ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_RSP, 0, "1", "next;");
1359 /* Ingress table 4: Destination lookup, broadcast and multicast handling
1360 * (priority 100). */
1361 HMAP_FOR_EACH (op, key_node, ports) {
1366 if (lport_is_enabled(op->nbs)) {
1367 ovn_multicast_add(mcgroups, &mc_flood, op);
1370 HMAP_FOR_EACH (od, key_node, datapaths) {
1375 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
1376 "outport = \""MC_FLOOD"\"; output;");
1379 /* Ingress table 4: Destination lookup, unicast handling (priority 50), */
1380 HMAP_FOR_EACH (op, key_node, ports) {
1385 for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1386 struct eth_addr mac;
1388 if (eth_addr_from_string(op->nbs->addresses[i], &mac)) {
1389 struct ds match, actions;
1392 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
1393 ETH_ADDR_ARGS(mac));
1396 ds_put_format(&actions, "outport = %s; output;", op->json_key);
1397 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
1398 ds_cstr(&match), ds_cstr(&actions));
1399 ds_destroy(&actions);
1401 } else if (!strcmp(op->nbs->addresses[i], "unknown")) {
1402 if (lport_is_enabled(op->nbs)) {
1403 ovn_multicast_add(mcgroups, &mc_unknown, op);
1404 op->od->has_unknown = true;
1407 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1410 "%s: invalid syntax '%s' in addresses column",
1411 op->nbs->name, op->nbs->addresses[i]);
1416 /* Ingress table 4: Destination lookup for unknown MACs (priority 0). */
1417 HMAP_FOR_EACH (od, key_node, datapaths) {
1422 if (od->has_unknown) {
1423 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
1424 "outport = \""MC_UNKNOWN"\"; output;");
1428 /* Egress table 2: Egress port security multicast/broadcast (priority
1430 HMAP_FOR_EACH (od, key_node, datapaths) {
1435 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC, 100, "eth.mcast",
1439 /* Egress table 2: Egress port security (priorities 50 and 150).
1441 * Priority 50 rules implement port security for enabled logical port.
1443 * Priority 150 rules drop packets to disabled logical ports, so that they
1444 * don't even receive multicast or broadcast packets. */
1445 HMAP_FOR_EACH (op, key_node, ports) {
1450 struct ds match = DS_EMPTY_INITIALIZER;
1451 ds_put_format(&match, "outport == %s", op->json_key);
1452 if (lport_is_enabled(op->nbs)) {
1453 build_port_security("eth.dst", op->nbs->port_security,
1454 op->nbs->n_port_security, &match);
1455 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC, 50,
1456 ds_cstr(&match), "output;");
1458 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC, 150,
1459 ds_cstr(&match), "drop;");
1467 lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
1469 return !lrport->enabled || *lrport->enabled;
1473 add_route(struct hmap *lflows, struct ovn_datapath *od,
1474 ovs_be32 network, ovs_be32 mask, ovs_be32 gateway)
1476 char *match = xasprintf("ip4.dst == "IP_FMT"/"IP_FMT,
1477 IP_ARGS(network), IP_ARGS(mask));
1479 struct ds actions = DS_EMPTY_INITIALIZER;
1480 ds_put_cstr(&actions, "ip.ttl--; reg0 = ");
1482 ds_put_format(&actions, IP_FMT, IP_ARGS(gateway));
1484 ds_put_cstr(&actions, "ip4.dst");
1486 ds_put_cstr(&actions, "; next;");
1488 /* The priority here is calculated to implement longest-prefix-match
1490 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
1491 count_1bits(ntohl(mask)), match, ds_cstr(&actions));
1492 ds_destroy(&actions);
1497 build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
1498 struct hmap *lflows)
1500 /* This flow table structure is documented in ovn-northd(8), so please
1501 * update ovn-northd.8.xml if you change anything. */
1503 /* Logical router ingress table 0: Admission control framework. */
1504 struct ovn_datapath *od;
1505 HMAP_FOR_EACH (od, key_node, datapaths) {
1510 /* Logical VLANs not supported.
1511 * Broadcast/multicast source address is invalid. */
1512 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
1513 "vlan.present || eth.src[40]", "drop;");
1516 /* Logical router ingress table 0: match (priority 50). */
1517 struct ovn_port *op;
1518 HMAP_FOR_EACH (op, key_node, ports) {
1523 if (!lrport_is_enabled(op->nbr)) {
1524 /* Drop packets from disabled logical ports (since logical flow
1525 * tables are default-drop). */
1529 char *match = xasprintf(
1530 "(eth.mcast || eth.dst == "ETH_ADDR_FMT") && inport == %s",
1531 ETH_ADDR_ARGS(op->mac), op->json_key);
1532 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
1537 /* Logical router ingress table 1: IP Input. */
1538 HMAP_FOR_EACH (od, key_node, datapaths) {
1543 /* L3 admission control: drop multicast and broadcast source, localhost
1544 * source or destination, and zero network source or destination
1545 * (priority 100). */
1546 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
1548 "ip4.src == 255.255.255.255 || "
1549 "ip4.src == 127.0.0.0/8 || "
1550 "ip4.dst == 127.0.0.0/8 || "
1551 "ip4.src == 0.0.0.0/8 || "
1552 "ip4.dst == 0.0.0.0/8",
1555 /* Drop Ethernet local broadcast. By definition this traffic should
1556 * not be forwarded.*/
1557 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
1558 "eth.bcast", "drop;");
1560 /* Drop IP multicast. */
1561 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
1562 "ip4.mcast", "drop;");
1566 * XXX Need to send ICMP time exceeded if !ip.later_frag. */
1567 char *match = xasprintf("ip4 && ip.ttl == {0, 1}");
1568 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30, match, "drop;");
1571 /* Pass other traffic not already handled to the next table for
1573 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
1576 HMAP_FOR_EACH (op, key_node, ports) {
1581 /* L3 admission control: drop packets that originate from an IP address
1582 * owned by the router or a broadcast address known to the router
1583 * (priority 100). */
1584 char *match = xasprintf("ip4.src == {"IP_FMT", "IP_FMT"}",
1585 IP_ARGS(op->ip), IP_ARGS(op->bcast));
1586 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
1590 /* ICMP echo reply. These flows reply to ICMP echo requests
1591 * received for the router's IP address. */
1593 "inport == %s && (ip4.dst == "IP_FMT" || ip4.dst == "IP_FMT") && "
1594 "icmp4.type == 8 && icmp4.code == 0",
1595 op->json_key, IP_ARGS(op->ip), IP_ARGS(op->bcast));
1596 char *actions = xasprintf(
1597 "ip4.dst = ip4.src; "
1598 "ip4.src = "IP_FMT"; "
1601 "inport = \"\"; /* Allow sending out inport. */ "
1604 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
1609 /* ARP reply. These flows reply to ARP requests for the router's own
1612 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
1613 op->json_key, IP_ARGS(op->ip));
1614 actions = xasprintf(
1615 "eth.dst = eth.src; "
1616 "eth.src = "ETH_ADDR_FMT"; "
1617 "arp.op = 2; /* ARP reply */ "
1618 "arp.tha = arp.sha; "
1619 "arp.sha = "ETH_ADDR_FMT"; "
1620 "arp.tpa = arp.spa; "
1621 "arp.spa = "IP_FMT"; "
1623 "inport = \"\"; /* Allow sending out inport. */ "
1625 ETH_ADDR_ARGS(op->mac),
1626 ETH_ADDR_ARGS(op->mac),
1629 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
1634 /* Drop IP traffic to this router. */
1635 match = xasprintf("ip4.dst == "IP_FMT, IP_ARGS(op->ip));
1636 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
1641 /* Logical router ingress table 2: IP Routing.
1643 * A packet that arrives at this table is an IP packet that should be
1644 * routed to the address in ip4.dst. This table sets reg0 to the next-hop
1645 * IP address (leaving ip4.dst, the packet’s final destination, unchanged)
1646 * and advances to the next table for ARP resolution. */
1647 HMAP_FOR_EACH (op, key_node, ports) {
1652 add_route(lflows, op->od, op->network, op->mask, 0);
1654 HMAP_FOR_EACH (od, key_node, datapaths) {
1660 add_route(lflows, od, 0, 0, od->gateway);
1663 /* XXX destination unreachable */
1665 /* Local router ingress table 3: ARP Resolution.
1667 * Any packet that reaches this table is an IP packet whose next-hop IP
1668 * address is in reg0. (ip4.dst is the final destination.) This table
1669 * resolves the IP address in reg0 into an output port in outport and an
1670 * Ethernet address in eth.dst. */
1671 HMAP_FOR_EACH (op, key_node, ports) {
1673 /* XXX ARP for neighboring router */
1674 } else if (op->od->n_router_ports) {
1675 for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1676 struct lport_addresses laddrs;
1677 if (!extract_lport_addresses(op->nbs->addresses[i], &laddrs,
1682 for (size_t k = 0; k < laddrs.n_ipv4_addrs; k++) {
1683 ovs_be32 ip = laddrs.ipv4_addrs[k].addr;
1684 for (size_t j = 0; j < op->od->n_router_ports; j++) {
1685 /* Get the Logical_Router_Port that the Logical_Port is
1686 * connected to, as 'peer'. */
1687 const char *peer_name = smap_get(
1688 &op->od->router_ports[j]->nbs->options,
1694 struct ovn_port *peer
1695 = ovn_port_find(ports, peer_name);
1696 if (!peer || !peer->nbr) {
1700 /* Make sure that 'ip' is in 'peer''s network. */
1701 if ((ip ^ peer->network) & peer->mask) {
1705 char *match = xasprintf("reg0 == "IP_FMT, IP_ARGS(ip));
1706 char *actions = xasprintf("eth.src = "ETH_ADDR_FMT"; "
1707 "eth.dst = "ETH_ADDR_FMT"; "
1710 ETH_ADDR_ARGS(peer->mac),
1711 ETH_ADDR_ARGS(laddrs.ea),
1713 ovn_lflow_add(lflows, peer->od,
1714 S_ROUTER_IN_ARP, 200, match, actions);
1721 free(laddrs.ipv4_addrs);
1726 /* Logical router egress table 0: Delivery (priority 100).
1728 * Priority 100 rules deliver packets to enabled logical ports. */
1729 HMAP_FOR_EACH (op, key_node, ports) {
1734 if (!lrport_is_enabled(op->nbr)) {
1735 /* Drop packets to disabled logical ports (since logical flow
1736 * tables are default-drop). */
1740 char *match = xasprintf("outport == %s", op->json_key);
1741 ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
1747 /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
1748 * constructing their contents based on the OVN_NB database. */
1750 build_lflows(struct northd_context *ctx, struct hmap *datapaths,
1753 struct hmap lflows = HMAP_INITIALIZER(&lflows);
1754 struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
1756 build_lswitch_flows(datapaths, ports, &lflows, &mcgroups);
1757 build_lrouter_flows(datapaths, ports, &lflows);
1759 /* Push changes to the Logical_Flow table to database. */
1760 const struct sbrec_logical_flow *sbflow, *next_sbflow;
1761 SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
1762 struct ovn_datapath *od
1763 = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath);
1765 sbrec_logical_flow_delete(sbflow);
1769 enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
1770 enum ovn_pipeline pipeline
1771 = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
1772 struct ovn_lflow *lflow = ovn_lflow_find(
1773 &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id),
1774 sbflow->priority, sbflow->match, sbflow->actions);
1776 ovn_lflow_destroy(&lflows, lflow);
1778 sbrec_logical_flow_delete(sbflow);
1781 struct ovn_lflow *lflow, *next_lflow;
1782 HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) {
1783 enum ovn_pipeline pipeline = ovn_stage_get_pipeline(lflow->stage);
1784 uint8_t table = ovn_stage_get_table(lflow->stage);
1786 sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
1787 sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
1788 sbrec_logical_flow_set_pipeline(
1789 sbflow, pipeline == P_IN ? "ingress" : "egress");
1790 sbrec_logical_flow_set_table_id(sbflow, table);
1791 sbrec_logical_flow_set_priority(sbflow, lflow->priority);
1792 sbrec_logical_flow_set_match(sbflow, lflow->match);
1793 sbrec_logical_flow_set_actions(sbflow, lflow->actions);
1795 const struct smap ids = SMAP_CONST1(&ids, "stage-name",
1796 ovn_stage_to_str(lflow->stage));
1797 sbrec_logical_flow_set_external_ids(sbflow, &ids);
1799 ovn_lflow_destroy(&lflows, lflow);
1801 hmap_destroy(&lflows);
1803 /* Push changes to the Multicast_Group table to database. */
1804 const struct sbrec_multicast_group *sbmc, *next_sbmc;
1805 SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) {
1806 struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths,
1809 sbrec_multicast_group_delete(sbmc);
1813 struct multicast_group group = { .name = sbmc->name,
1814 .key = sbmc->tunnel_key };
1815 struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group);
1817 ovn_multicast_update_sbrec(mc, sbmc);
1818 ovn_multicast_destroy(&mcgroups, mc);
1820 sbrec_multicast_group_delete(sbmc);
1823 struct ovn_multicast *mc, *next_mc;
1824 HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) {
1825 sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn);
1826 sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb);
1827 sbrec_multicast_group_set_name(sbmc, mc->group->name);
1828 sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key);
1829 ovn_multicast_update_sbrec(mc, sbmc);
1830 ovn_multicast_destroy(&mcgroups, mc);
1832 hmap_destroy(&mcgroups);
1836 ovnnb_db_run(struct northd_context *ctx)
1838 if (!ctx->ovnsb_txn) {
1841 VLOG_DBG("ovn-nb db contents may have changed.");
1842 struct hmap datapaths, ports;
1843 build_datapaths(ctx, &datapaths);
1844 build_ports(ctx, &datapaths, &ports);
1845 build_lflows(ctx, &datapaths, &ports);
1847 struct ovn_datapath *dp, *next_dp;
1848 HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) {
1849 ovn_datapath_destroy(&datapaths, dp);
1851 hmap_destroy(&datapaths);
1853 struct ovn_port *port, *next_port;
1854 HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) {
1855 ovn_port_destroy(&ports, port);
1857 hmap_destroy(&ports);
1861 * The only change we get notified about is if the 'chassis' column of the
1862 * 'Port_Binding' table changes. When this column is not empty, it means we
1863 * need to set the corresponding logical port as 'up' in the northbound DB.
1866 ovnsb_db_run(struct northd_context *ctx)
1868 if (!ctx->ovnnb_txn) {
1871 struct hmap lports_hmap;
1872 const struct sbrec_port_binding *sb;
1873 const struct nbrec_logical_port *nb;
1875 struct lport_hash_node {
1876 struct hmap_node node;
1877 const struct nbrec_logical_port *nb;
1878 } *hash_node, *hash_node_next;
1880 VLOG_DBG("Recalculating port up states for ovn-nb db.");
1882 hmap_init(&lports_hmap);
1884 NBREC_LOGICAL_PORT_FOR_EACH(nb, ctx->ovnnb_idl) {
1885 hash_node = xzalloc(sizeof *hash_node);
1887 hmap_insert(&lports_hmap, &hash_node->node, hash_string(nb->name, 0));
1890 SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) {
1892 HMAP_FOR_EACH_WITH_HASH(hash_node, node,
1893 hash_string(sb->logical_port, 0),
1895 if (!strcmp(sb->logical_port, hash_node->nb->name)) {
1902 /* The logical port doesn't exist for this port binding. This can
1903 * happen under normal circumstances when ovn-northd hasn't gotten
1904 * around to pruning the Port_Binding yet. */
1908 if (sb->chassis && (!nb->up || !*nb->up)) {
1910 nbrec_logical_port_set_up(nb, &up, 1);
1911 } else if (!sb->chassis && (!nb->up || *nb->up)) {
1913 nbrec_logical_port_set_up(nb, &up, 1);
1917 HMAP_FOR_EACH_SAFE(hash_node, hash_node_next, node, &lports_hmap) {
1918 hmap_remove(&lports_hmap, &hash_node->node);
1921 hmap_destroy(&lports_hmap);
1925 static char *default_db_;
1931 default_db_ = xasprintf("unix:%s/db.sock", ovs_rundir());
1937 parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
1940 DAEMON_OPTION_ENUMS,
1943 static const struct option long_options[] = {
1944 {"ovnsb-db", required_argument, NULL, 'd'},
1945 {"ovnnb-db", required_argument, NULL, 'D'},
1946 {"help", no_argument, NULL, 'h'},
1947 {"options", no_argument, NULL, 'o'},
1948 {"version", no_argument, NULL, 'V'},
1949 DAEMON_LONG_OPTIONS,
1951 STREAM_SSL_LONG_OPTIONS,
1954 char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
1959 c = getopt_long(argc, argv, short_options, long_options, NULL);
1965 DAEMON_OPTION_HANDLERS;
1966 VLOG_OPTION_HANDLERS;
1967 STREAM_SSL_OPTION_HANDLERS;
1982 ovs_cmdl_print_options(long_options);
1986 ovs_print_version(0, 0);
1995 ovnsb_db = default_db();
1999 ovnnb_db = default_db();
2002 free(short_options);
2006 add_column_noalert(struct ovsdb_idl *idl,
2007 const struct ovsdb_idl_column *column)
2009 ovsdb_idl_add_column(idl, column);
2010 ovsdb_idl_omit_alert(idl, column);
2014 main(int argc, char *argv[])
2016 int res = EXIT_SUCCESS;
2017 struct unixctl_server *unixctl;
2021 fatal_ignore_sigpipe();
2022 set_program_name(argv[0]);
2023 service_start(&argc, &argv);
2024 parse_options(argc, argv);
2026 daemonize_start(false);
2028 retval = unixctl_server_create(NULL, &unixctl);
2032 unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
2034 daemonize_complete();
2039 /* We want to detect all changes to the ovn-nb db. */
2040 struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
2041 ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true));
2043 struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
2044 ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true));
2046 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow);
2047 add_column_noalert(ovnsb_idl_loop.idl,
2048 &sbrec_logical_flow_col_logical_datapath);
2049 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline);
2050 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id);
2051 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_priority);
2052 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match);
2053 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions);
2055 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group);
2056 add_column_noalert(ovnsb_idl_loop.idl,
2057 &sbrec_multicast_group_col_datapath);
2058 add_column_noalert(ovnsb_idl_loop.idl,
2059 &sbrec_multicast_group_col_tunnel_key);
2060 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name);
2061 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports);
2063 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding);
2064 add_column_noalert(ovnsb_idl_loop.idl,
2065 &sbrec_datapath_binding_col_tunnel_key);
2066 add_column_noalert(ovnsb_idl_loop.idl,
2067 &sbrec_datapath_binding_col_external_ids);
2069 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding);
2070 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath);
2071 add_column_noalert(ovnsb_idl_loop.idl,
2072 &sbrec_port_binding_col_logical_port);
2073 add_column_noalert(ovnsb_idl_loop.idl,
2074 &sbrec_port_binding_col_tunnel_key);
2075 add_column_noalert(ovnsb_idl_loop.idl,
2076 &sbrec_port_binding_col_parent_port);
2077 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag);
2078 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type);
2079 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options);
2080 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac);
2081 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis);
2086 struct northd_context ctx = {
2087 .ovnnb_idl = ovnnb_idl_loop.idl,
2088 .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
2089 .ovnsb_idl = ovnsb_idl_loop.idl,
2090 .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
2096 unixctl_server_run(unixctl);
2097 unixctl_server_wait(unixctl);
2099 poll_immediate_wake();
2101 ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
2102 ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
2105 if (should_service_stop()) {
2110 unixctl_server_destroy(unixctl);
2111 ovsdb_idl_loop_destroy(&ovnnb_idl_loop);
2112 ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
2120 ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
2121 const char *argv[] OVS_UNUSED, void *exiting_)
2123 bool *exiting = exiting_;
2126 unixctl_command_reply(conn, NULL);