ovn: Support multiple router ports per logical switch.
[cascardo/ovs.git] / ovn / northd / ovn-northd.c
1 /*
2  * Licensed under the Apache License, Version 2.0 (the "License");
3  * you may not use this file except in compliance with the License.
4  * You may obtain a copy of the License at:
5  *
6  *     http://www.apache.org/licenses/LICENSE-2.0
7  *
8  * Unless required by applicable law or agreed to in writing, software
9  * distributed under the License is distributed on an "AS IS" BASIS,
10  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11  * See the License for the specific language governing permissions and
12  * limitations under the License.
13  */
14
15 #include <config.h>
16
17 #include <getopt.h>
18 #include <stdlib.h>
19 #include <stdio.h>
20
21 #include "command-line.h"
22 #include "daemon.h"
23 #include "dirs.h"
24 #include "dynamic-string.h"
25 #include "fatal-signal.h"
26 #include "hash.h"
27 #include "hmap.h"
28 #include "json.h"
29 #include "ovn/lib/lex.h"
30 #include "ovn/lib/ovn-nb-idl.h"
31 #include "ovn/lib/ovn-sb-idl.h"
32 #include "poll-loop.h"
33 #include "smap.h"
34 #include "stream.h"
35 #include "stream-ssl.h"
36 #include "unixctl.h"
37 #include "util.h"
38 #include "uuid.h"
39 #include "openvswitch/vlog.h"
40
41 VLOG_DEFINE_THIS_MODULE(ovn_northd);
42
43 static unixctl_cb_func ovn_northd_exit;
44
45 struct northd_context {
46     struct ovsdb_idl *ovnnb_idl;
47     struct ovsdb_idl *ovnsb_idl;
48     struct ovsdb_idl_txn *ovnnb_txn;
49     struct ovsdb_idl_txn *ovnsb_txn;
50 };
51
52 static const char *ovnnb_db;
53 static const char *ovnsb_db;
54
55 static const char *default_db(void);
56 \f
57 /* Pipeline stages. */
58
59 /* The two pipelines in an OVN logical flow table. */
60 enum ovn_pipeline {
61     P_IN,                       /* Ingress pipeline. */
62     P_OUT                       /* Egress pipeline. */
63 };
64
65 /* The two purposes for which ovn-northd uses OVN logical datapaths. */
66 enum ovn_datapath_type {
67     DP_SWITCH,                  /* OVN logical switch. */
68     DP_ROUTER                   /* OVN logical router. */
69 };
70
71 /* Returns an "enum ovn_stage" built from the arguments.
72  *
73  * (It's better to use ovn_stage_build() for type-safety reasons, but inline
74  * functions can't be used in enums or switch cases.) */
75 #define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
76     (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
77
78 /* A stage within an OVN logical switch or router.
79  *
80  * An "enum ovn_stage" indicates whether the stage is part of a logical switch
81  * or router, whether the stage is part of the ingress or egress pipeline, and
82  * the table within that pipeline.  The first three components are combined to
83  * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC,
84  * S_ROUTER_OUT_DELIVERY. */
85 enum ovn_stage {
86 #define PIPELINE_STAGES                                                 \
87     /* Logical switch ingress stages. */                                \
88     PIPELINE_STAGE(SWITCH, IN,  PORT_SEC,    0, "switch_in_port_sec")   \
89     PIPELINE_STAGE(SWITCH, IN,  PRE_ACL,     1, "switch_in_pre_acl")        \
90     PIPELINE_STAGE(SWITCH, IN,  ACL,         2, "switch_in_acl")        \
91     PIPELINE_STAGE(SWITCH, IN,  L2_LKUP,     3, "switch_in_l2_lkup")    \
92                                                                         \
93     /* Logical switch egress stages. */                                 \
94     PIPELINE_STAGE(SWITCH, OUT, PRE_ACL,     0, "switch_out_pre_acl")       \
95     PIPELINE_STAGE(SWITCH, OUT, ACL,         1, "switch_out_acl")       \
96     PIPELINE_STAGE(SWITCH, OUT, PORT_SEC,    2, "switch_out_port_sec")  \
97                                                                         \
98     /* Logical router ingress stages. */                                \
99     PIPELINE_STAGE(ROUTER, IN,  ADMISSION,   0, "router_in_admission")  \
100     PIPELINE_STAGE(ROUTER, IN,  IP_INPUT,    1, "router_in_ip_input")   \
101     PIPELINE_STAGE(ROUTER, IN,  IP_ROUTING,  2, "router_in_ip_routing") \
102     PIPELINE_STAGE(ROUTER, IN,  ARP,         3, "router_in_arp")        \
103                                                                         \
104     /* Logical router egress stages. */                                 \
105     PIPELINE_STAGE(ROUTER, OUT, DELIVERY,    0, "router_out_delivery")
106
107 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME)   \
108     S_##DP_TYPE##_##PIPELINE##_##STAGE                          \
109         = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
110     PIPELINE_STAGES
111 #undef PIPELINE_STAGE
112 };
113
114 /* Returns an "enum ovn_stage" built from the arguments. */
115 static enum ovn_stage
116 ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
117                 uint8_t table)
118 {
119     return OVN_STAGE_BUILD(dp_type, pipeline, table);
120 }
121
122 /* Returns the pipeline to which 'stage' belongs. */
123 static enum ovn_pipeline
124 ovn_stage_get_pipeline(enum ovn_stage stage)
125 {
126     return (stage >> 8) & 1;
127 }
128
129 /* Returns the table to which 'stage' belongs. */
130 static uint8_t
131 ovn_stage_get_table(enum ovn_stage stage)
132 {
133     return stage & 0xff;
134 }
135
136 /* Returns a string name for 'stage'. */
137 static const char *
138 ovn_stage_to_str(enum ovn_stage stage)
139 {
140     switch (stage) {
141 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME)       \
142         case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
143     PIPELINE_STAGES
144 #undef PIPELINE_STAGE
145         default: return "<unknown>";
146     }
147 }
148 \f
149 static void
150 usage(void)
151 {
152     printf("\
153 %s: OVN northbound management daemon\n\
154 usage: %s [OPTIONS]\n\
155 \n\
156 Options:\n\
157   --ovnnb-db=DATABASE       connect to ovn-nb database at DATABASE\n\
158                             (default: %s)\n\
159   --ovnsb-db=DATABASE       connect to ovn-sb database at DATABASE\n\
160                             (default: %s)\n\
161   -h, --help                display this help message\n\
162   -o, --options             list available options\n\
163   -V, --version             display version information\n\
164 ", program_name, program_name, default_db(), default_db());
165     daemon_usage();
166     vlog_usage();
167     stream_usage("database", true, true, false);
168 }
169 \f
170 struct tnlid_node {
171     struct hmap_node hmap_node;
172     uint32_t tnlid;
173 };
174
175 static void
176 destroy_tnlids(struct hmap *tnlids)
177 {
178     struct tnlid_node *node, *next;
179     HMAP_FOR_EACH_SAFE (node, next, hmap_node, tnlids) {
180         hmap_remove(tnlids, &node->hmap_node);
181         free(node);
182     }
183     hmap_destroy(tnlids);
184 }
185
186 static void
187 add_tnlid(struct hmap *set, uint32_t tnlid)
188 {
189     struct tnlid_node *node = xmalloc(sizeof *node);
190     hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0));
191     node->tnlid = tnlid;
192 }
193
194 static bool
195 tnlid_in_use(const struct hmap *set, uint32_t tnlid)
196 {
197     const struct tnlid_node *node;
198     HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) {
199         if (node->tnlid == tnlid) {
200             return true;
201         }
202     }
203     return false;
204 }
205
206 static uint32_t
207 allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
208                uint32_t *hint)
209 {
210     for (uint32_t tnlid = *hint + 1; tnlid != *hint;
211          tnlid = tnlid + 1 <= max ? tnlid + 1 : 1) {
212         if (!tnlid_in_use(set, tnlid)) {
213             add_tnlid(set, tnlid);
214             *hint = tnlid;
215             return tnlid;
216         }
217     }
218
219     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
220     VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
221     return 0;
222 }
223 \f
224 /* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
225  * sb->external_ids:logical-switch. */
226 struct ovn_datapath {
227     struct hmap_node key_node;  /* Index on 'key'. */
228     struct uuid key;            /* (nbs/nbr)->header_.uuid. */
229
230     const struct nbrec_logical_switch *nbs;  /* May be NULL. */
231     const struct nbrec_logical_router *nbr;  /* May be NULL. */
232     const struct sbrec_datapath_binding *sb; /* May be NULL. */
233
234     struct ovs_list list;       /* In list of similar records. */
235
236     /* Logical router data (digested from nbr). */
237     ovs_be32 gateway;
238
239     /* Logical switch data. */
240     struct ovn_port **router_ports;
241     size_t n_router_ports;
242
243     struct hmap port_tnlids;
244     uint32_t port_key_hint;
245
246     bool has_unknown;
247 };
248
249 static struct ovn_datapath *
250 ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
251                     const struct nbrec_logical_switch *nbs,
252                     const struct nbrec_logical_router *nbr,
253                     const struct sbrec_datapath_binding *sb)
254 {
255     struct ovn_datapath *od = xzalloc(sizeof *od);
256     od->key = *key;
257     od->sb = sb;
258     od->nbs = nbs;
259     od->nbr = nbr;
260     hmap_init(&od->port_tnlids);
261     od->port_key_hint = 0;
262     hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
263     return od;
264 }
265
266 static void
267 ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
268 {
269     if (od) {
270         /* Don't remove od->list.  It is used within build_datapaths() as a
271          * private list and once we've exited that function it is not safe to
272          * use it. */
273         hmap_remove(datapaths, &od->key_node);
274         destroy_tnlids(&od->port_tnlids);
275         free(od->router_ports);
276         free(od);
277     }
278 }
279
280 static struct ovn_datapath *
281 ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
282 {
283     struct ovn_datapath *od;
284
285     HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) {
286         if (uuid_equals(uuid, &od->key)) {
287             return od;
288         }
289     }
290     return NULL;
291 }
292
293 static struct ovn_datapath *
294 ovn_datapath_from_sbrec(struct hmap *datapaths,
295                         const struct sbrec_datapath_binding *sb)
296 {
297     struct uuid key;
298
299     if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
300         !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
301         return NULL;
302     }
303     return ovn_datapath_find(datapaths, &key);
304 }
305
306 static void
307 join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
308                struct ovs_list *sb_only, struct ovs_list *nb_only,
309                struct ovs_list *both)
310 {
311     hmap_init(datapaths);
312     list_init(sb_only);
313     list_init(nb_only);
314     list_init(both);
315
316     const struct sbrec_datapath_binding *sb, *sb_next;
317     SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
318         struct uuid key;
319         if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
320             !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
321             ovsdb_idl_txn_add_comment(
322                 ctx->ovnsb_txn,
323                 "deleting Datapath_Binding "UUID_FMT" that lacks "
324                 "external-ids:logical-switch and "
325                 "external-ids:logical-router",
326                 UUID_ARGS(&sb->header_.uuid));
327             sbrec_datapath_binding_delete(sb);
328             continue;
329         }
330
331         if (ovn_datapath_find(datapaths, &key)) {
332             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
333             VLOG_INFO_RL(
334                 &rl, "deleting Datapath_Binding "UUID_FMT" with "
335                 "duplicate external-ids:logical-switch/router "UUID_FMT,
336                 UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key));
337             sbrec_datapath_binding_delete(sb);
338             continue;
339         }
340
341         struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
342                                                       NULL, NULL, sb);
343         list_push_back(sb_only, &od->list);
344     }
345
346     const struct nbrec_logical_switch *nbs;
347     NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
348         struct ovn_datapath *od = ovn_datapath_find(datapaths,
349                                                     &nbs->header_.uuid);
350         if (od) {
351             od->nbs = nbs;
352             list_remove(&od->list);
353             list_push_back(both, &od->list);
354         } else {
355             od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
356                                      nbs, NULL, NULL);
357             list_push_back(nb_only, &od->list);
358         }
359     }
360
361     const struct nbrec_logical_router *nbr;
362     NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
363         struct ovn_datapath *od = ovn_datapath_find(datapaths,
364                                                     &nbr->header_.uuid);
365         if (od) {
366             if (!od->nbs) {
367                 od->nbr = nbr;
368                 list_remove(&od->list);
369                 list_push_back(both, &od->list);
370             } else {
371                 /* Can't happen! */
372                 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
373                 VLOG_WARN_RL(&rl,
374                              "duplicate UUID "UUID_FMT" in OVN_Northbound",
375                              UUID_ARGS(&nbr->header_.uuid));
376                 continue;
377             }
378         } else {
379             od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
380                                      NULL, nbr, NULL);
381             list_push_back(nb_only, &od->list);
382         }
383
384         od->gateway = 0;
385         if (nbr->default_gw) {
386             ovs_be32 ip, mask;
387             char *error = ip_parse_masked(nbr->default_gw, &ip, &mask);
388             if (error || !ip || mask != OVS_BE32_MAX) {
389                 static struct vlog_rate_limit rl
390                     = VLOG_RATE_LIMIT_INIT(5, 1);
391                 VLOG_WARN_RL(&rl, "bad 'gateway' %s", nbr->default_gw);
392                 free(error);
393             } else {
394                 od->gateway = ip;
395             }
396         }
397     }
398 }
399
400 static uint32_t
401 ovn_datapath_allocate_key(struct hmap *dp_tnlids)
402 {
403     static uint32_t hint;
404     return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint);
405 }
406
407 static void
408 build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
409 {
410     struct ovs_list sb_only, nb_only, both;
411
412     join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both);
413
414     if (!list_is_empty(&nb_only)) {
415         /* First index the in-use datapath tunnel IDs. */
416         struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
417         struct ovn_datapath *od;
418         LIST_FOR_EACH (od, list, &both) {
419             add_tnlid(&dp_tnlids, od->sb->tunnel_key);
420         }
421
422         /* Add southbound record for each unmatched northbound record. */
423         LIST_FOR_EACH (od, list, &nb_only) {
424             uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
425             if (!tunnel_key) {
426                 break;
427             }
428
429             od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
430
431             char uuid_s[UUID_LEN + 1];
432             sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
433             const char *key = od->nbs ? "logical-switch" : "logical-router";
434             const struct smap id = SMAP_CONST1(&id, key, uuid_s);
435             sbrec_datapath_binding_set_external_ids(od->sb, &id);
436
437             sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
438         }
439         destroy_tnlids(&dp_tnlids);
440     }
441
442     /* Delete southbound records without northbound matches. */
443     struct ovn_datapath *od, *next;
444     LIST_FOR_EACH_SAFE (od, next, list, &sb_only) {
445         list_remove(&od->list);
446         sbrec_datapath_binding_delete(od->sb);
447         ovn_datapath_destroy(datapaths, od);
448     }
449 }
450 \f
451 struct ovn_port {
452     struct hmap_node key_node;  /* Index on 'key'. */
453     char *key;                  /* nbs->name, nbr->name, sb->logical_port. */
454     char *json_key;             /* 'key', quoted for use in JSON. */
455
456     const struct nbrec_logical_port *nbs;        /* May be NULL. */
457     const struct nbrec_logical_router_port *nbr; /* May be NULL. */
458     const struct sbrec_port_binding *sb;         /* May be NULL. */
459
460     /* Logical router port data. */
461     ovs_be32 ip, mask;          /* 192.168.10.123/24. */
462     ovs_be32 network;           /* 192.168.10.0. */
463     ovs_be32 bcast;             /* 192.168.10.255. */
464     struct eth_addr mac;
465     struct ovn_port *peer;
466
467     struct ovn_datapath *od;
468
469     struct ovs_list list;       /* In list of similar records. */
470 };
471
472 static struct ovn_port *
473 ovn_port_create(struct hmap *ports, const char *key,
474                 const struct nbrec_logical_port *nbs,
475                 const struct nbrec_logical_router_port *nbr,
476                 const struct sbrec_port_binding *sb)
477 {
478     struct ovn_port *op = xzalloc(sizeof *op);
479
480     struct ds json_key = DS_EMPTY_INITIALIZER;
481     json_string_escape(key, &json_key);
482     op->json_key = ds_steal_cstr(&json_key);
483
484     op->key = xstrdup(key);
485     op->sb = sb;
486     op->nbs = nbs;
487     op->nbr = nbr;
488     hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
489     return op;
490 }
491
492 static void
493 ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
494 {
495     if (port) {
496         /* Don't remove port->list.  It is used within build_ports() as a
497          * private list and once we've exited that function it is not safe to
498          * use it. */
499         hmap_remove(ports, &port->key_node);
500         free(port->json_key);
501         free(port->key);
502         free(port);
503     }
504 }
505
506 static struct ovn_port *
507 ovn_port_find(struct hmap *ports, const char *name)
508 {
509     struct ovn_port *op;
510
511     HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
512         if (!strcmp(op->key, name)) {
513             return op;
514         }
515     }
516     return NULL;
517 }
518
519 static uint32_t
520 ovn_port_allocate_key(struct ovn_datapath *od)
521 {
522     return allocate_tnlid(&od->port_tnlids, "port",
523                           (1u << 15) - 1, &od->port_key_hint);
524 }
525
526 static void
527 join_logical_ports(struct northd_context *ctx,
528                    struct hmap *datapaths, struct hmap *ports,
529                    struct ovs_list *sb_only, struct ovs_list *nb_only,
530                    struct ovs_list *both)
531 {
532     hmap_init(ports);
533     list_init(sb_only);
534     list_init(nb_only);
535     list_init(both);
536
537     const struct sbrec_port_binding *sb;
538     SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
539         struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
540                                               NULL, NULL, sb);
541         list_push_back(sb_only, &op->list);
542     }
543
544     struct ovn_datapath *od;
545     HMAP_FOR_EACH (od, key_node, datapaths) {
546         if (od->nbs) {
547             for (size_t i = 0; i < od->nbs->n_ports; i++) {
548                 const struct nbrec_logical_port *nbs = od->nbs->ports[i];
549                 struct ovn_port *op = ovn_port_find(ports, nbs->name);
550                 if (op) {
551                     if (op->nbs || op->nbr) {
552                         static struct vlog_rate_limit rl
553                             = VLOG_RATE_LIMIT_INIT(5, 1);
554                         VLOG_WARN_RL(&rl, "duplicate logical port %s",
555                                      nbs->name);
556                         continue;
557                     }
558                     op->nbs = nbs;
559                     list_remove(&op->list);
560                     list_push_back(both, &op->list);
561                 } else {
562                     op = ovn_port_create(ports, nbs->name, nbs, NULL, NULL);
563                     list_push_back(nb_only, &op->list);
564                 }
565
566                 op->od = od;
567             }
568         } else {
569             for (size_t i = 0; i < od->nbr->n_ports; i++) {
570                 const struct nbrec_logical_router_port *nbr
571                     = od->nbr->ports[i];
572
573                 struct eth_addr mac;
574                 if (!eth_addr_from_string(nbr->mac, &mac)) {
575                     static struct vlog_rate_limit rl
576                         = VLOG_RATE_LIMIT_INIT(5, 1);
577                     VLOG_WARN_RL(&rl, "bad 'mac' %s", nbr->mac);
578                     continue;
579                 }
580
581                 ovs_be32 ip, mask;
582                 char *error = ip_parse_masked(nbr->network, &ip, &mask);
583                 if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) {
584                     static struct vlog_rate_limit rl
585                         = VLOG_RATE_LIMIT_INIT(5, 1);
586                     VLOG_WARN_RL(&rl, "bad 'network' %s", nbr->network);
587                     free(error);
588                     continue;
589                 }
590
591                 char name[UUID_LEN + 1];
592                 snprintf(name, sizeof name, UUID_FMT,
593                          UUID_ARGS(&nbr->header_.uuid));
594                 struct ovn_port *op = ovn_port_find(ports, name);
595                 if (op) {
596                     if (op->nbs || op->nbr) {
597                         static struct vlog_rate_limit rl
598                             = VLOG_RATE_LIMIT_INIT(5, 1);
599                         VLOG_WARN_RL(&rl, "duplicate logical router port %s",
600                                      name);
601                         continue;
602                     }
603                     op->nbr = nbr;
604                     list_remove(&op->list);
605                     list_push_back(both, &op->list);
606                 } else {
607                     op = ovn_port_create(ports, name, NULL, nbr, NULL);
608                     list_push_back(nb_only, &op->list);
609                 }
610
611                 op->ip = ip;
612                 op->mask = mask;
613                 op->network = ip & mask;
614                 op->bcast = ip | ~mask;
615                 op->mac = mac;
616
617                 op->od = od;
618             }
619         }
620     }
621
622     /* Connect logical router ports, and logical switch ports of type "router",
623      * to their peers. */
624     struct ovn_port *op;
625     HMAP_FOR_EACH (op, key_node, ports) {
626         if (op->nbs && !strcmp(op->nbs->type, "router")) {
627             const char *peer_name = smap_get(&op->nbs->options, "router-port");
628             if (!peer_name) {
629                 continue;
630             }
631
632             struct ovn_port *peer = ovn_port_find(ports, peer_name);
633             if (!peer || !peer->nbr) {
634                 continue;
635             }
636
637             peer->peer = op;
638             op->peer = peer;
639             op->od->router_ports = xrealloc(
640                 op->od->router_ports,
641                 sizeof *op->od->router_ports * (op->od->n_router_ports + 1));
642             op->od->router_ports[op->od->n_router_ports++] = op;
643         } else if (op->nbr && op->nbr->peer) {
644             char peer_name[UUID_LEN + 1];
645             snprintf(peer_name, sizeof peer_name, UUID_FMT,
646                      UUID_ARGS(&op->nbr->peer->header_.uuid));
647             op->peer = ovn_port_find(ports, peer_name);
648         }
649     }
650 }
651
652 static void
653 ovn_port_update_sbrec(const struct ovn_port *op)
654 {
655     sbrec_port_binding_set_datapath(op->sb, op->od->sb);
656     if (op->nbr) {
657         sbrec_port_binding_set_type(op->sb, "patch");
658
659         const char *peer = op->peer ? op->peer->key : "<error>";
660         const struct smap ids = SMAP_CONST1(&ids, "peer", peer);
661         sbrec_port_binding_set_options(op->sb, &ids);
662
663         sbrec_port_binding_set_parent_port(op->sb, NULL);
664         sbrec_port_binding_set_tag(op->sb, NULL, 0);
665         sbrec_port_binding_set_mac(op->sb, NULL, 0);
666     } else {
667         if (strcmp(op->nbs->type, "router")) {
668             sbrec_port_binding_set_type(op->sb, op->nbs->type);
669             sbrec_port_binding_set_options(op->sb, &op->nbs->options);
670         } else {
671             sbrec_port_binding_set_type(op->sb, "patch");
672
673             const char *router_port = smap_get(&op->nbs->options,
674                                                "router-port");
675             if (!router_port) {
676                 router_port = "<error>";
677             }
678             const struct smap ids = SMAP_CONST1(&ids, "peer", router_port);
679             sbrec_port_binding_set_options(op->sb, &ids);
680         }
681         sbrec_port_binding_set_parent_port(op->sb, op->nbs->parent_name);
682         sbrec_port_binding_set_tag(op->sb, op->nbs->tag, op->nbs->n_tag);
683         sbrec_port_binding_set_mac(op->sb, (const char **) op->nbs->addresses,
684                                    op->nbs->n_addresses);
685     }
686 }
687
688 static void
689 build_ports(struct northd_context *ctx, struct hmap *datapaths,
690             struct hmap *ports)
691 {
692     struct ovs_list sb_only, nb_only, both;
693
694     join_logical_ports(ctx, datapaths, ports, &sb_only, &nb_only, &both);
695
696     /* For logical ports that are in both databases, update the southbound
697      * record based on northbound data.  Also index the in-use tunnel_keys. */
698     struct ovn_port *op, *next;
699     LIST_FOR_EACH_SAFE (op, next, list, &both) {
700         ovn_port_update_sbrec(op);
701
702         add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
703         if (op->sb->tunnel_key > op->od->port_key_hint) {
704             op->od->port_key_hint = op->sb->tunnel_key;
705         }
706     }
707
708     /* Add southbound record for each unmatched northbound record. */
709     LIST_FOR_EACH_SAFE (op, next, list, &nb_only) {
710         uint16_t tunnel_key = ovn_port_allocate_key(op->od);
711         if (!tunnel_key) {
712             continue;
713         }
714
715         op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
716         ovn_port_update_sbrec(op);
717
718         sbrec_port_binding_set_logical_port(op->sb, op->key);
719         sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
720     }
721
722     /* Delete southbound records without northbound matches. */
723     LIST_FOR_EACH_SAFE(op, next, list, &sb_only) {
724         list_remove(&op->list);
725         sbrec_port_binding_delete(op->sb);
726         ovn_port_destroy(ports, op);
727     }
728 }
729 \f
730 #define OVN_MIN_MULTICAST 32768
731 #define OVN_MAX_MULTICAST 65535
732
733 struct multicast_group {
734     const char *name;
735     uint16_t key;               /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
736 };
737
738 #define MC_FLOOD "_MC_flood"
739 static const struct multicast_group mc_flood = { MC_FLOOD, 65535 };
740
741 #define MC_UNKNOWN "_MC_unknown"
742 static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 };
743
744 static bool
745 multicast_group_equal(const struct multicast_group *a,
746                       const struct multicast_group *b)
747 {
748     return !strcmp(a->name, b->name) && a->key == b->key;
749 }
750
751 /* Multicast group entry. */
752 struct ovn_multicast {
753     struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */
754     struct ovn_datapath *datapath;
755     const struct multicast_group *group;
756
757     struct ovn_port **ports;
758     size_t n_ports, allocated_ports;
759 };
760
761 static uint32_t
762 ovn_multicast_hash(const struct ovn_datapath *datapath,
763                    const struct multicast_group *group)
764 {
765     return hash_pointer(datapath, group->key);
766 }
767
768 static struct ovn_multicast *
769 ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath,
770                    const struct multicast_group *group)
771 {
772     struct ovn_multicast *mc;
773
774     HMAP_FOR_EACH_WITH_HASH (mc, hmap_node,
775                              ovn_multicast_hash(datapath, group), mcgroups) {
776         if (mc->datapath == datapath
777             && multicast_group_equal(mc->group, group)) {
778             return mc;
779         }
780     }
781     return NULL;
782 }
783
784 static void
785 ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group,
786                   struct ovn_port *port)
787 {
788     struct ovn_datapath *od = port->od;
789     struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group);
790     if (!mc) {
791         mc = xmalloc(sizeof *mc);
792         hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group));
793         mc->datapath = od;
794         mc->group = group;
795         mc->n_ports = 0;
796         mc->allocated_ports = 4;
797         mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports);
798     }
799     if (mc->n_ports >= mc->allocated_ports) {
800         mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports,
801                                sizeof *mc->ports);
802     }
803     mc->ports[mc->n_ports++] = port;
804 }
805
806 static void
807 ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc)
808 {
809     if (mc) {
810         hmap_remove(mcgroups, &mc->hmap_node);
811         free(mc->ports);
812         free(mc);
813     }
814 }
815
816 static void
817 ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
818                            const struct sbrec_multicast_group *sb)
819 {
820     struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports);
821     for (size_t i = 0; i < mc->n_ports; i++) {
822         ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb);
823     }
824     sbrec_multicast_group_set_ports(sb, ports, mc->n_ports);
825     free(ports);
826 }
827 \f
828 /* Logical flow generation.
829  *
830  * This code generates the Logical_Flow table in the southbound database, as a
831  * function of most of the northbound database.
832  */
833
834 struct ovn_lflow {
835     struct hmap_node hmap_node;
836
837     struct ovn_datapath *od;
838     enum ovn_stage stage;
839     uint16_t priority;
840     char *match;
841     char *actions;
842 };
843
844 static size_t
845 ovn_lflow_hash(const struct ovn_lflow *lflow)
846 {
847     size_t hash = uuid_hash(&lflow->od->key);
848     hash = hash_2words((lflow->stage << 16) | lflow->priority, hash);
849     hash = hash_string(lflow->match, hash);
850     return hash_string(lflow->actions, hash);
851 }
852
853 static bool
854 ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b)
855 {
856     return (a->od == b->od
857             && a->stage == b->stage
858             && a->priority == b->priority
859             && !strcmp(a->match, b->match)
860             && !strcmp(a->actions, b->actions));
861 }
862
863 static void
864 ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
865               enum ovn_stage stage, uint16_t priority,
866               char *match, char *actions)
867 {
868     lflow->od = od;
869     lflow->stage = stage;
870     lflow->priority = priority;
871     lflow->match = match;
872     lflow->actions = actions;
873 }
874
875 /* Adds a row with the specified contents to the Logical_Flow table. */
876 static void
877 ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
878               enum ovn_stage stage, uint16_t priority,
879               const char *match, const char *actions)
880 {
881     struct ovn_lflow *lflow = xmalloc(sizeof *lflow);
882     ovn_lflow_init(lflow, od, stage, priority,
883                    xstrdup(match), xstrdup(actions));
884     hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow));
885 }
886
887 static struct ovn_lflow *
888 ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od,
889                enum ovn_stage stage, uint16_t priority,
890                const char *match, const char *actions)
891 {
892     struct ovn_lflow target;
893     ovn_lflow_init(&target, od, stage, priority,
894                    CONST_CAST(char *, match), CONST_CAST(char *, actions));
895
896     struct ovn_lflow *lflow;
897     HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, ovn_lflow_hash(&target),
898                              lflows) {
899         if (ovn_lflow_equal(lflow, &target)) {
900             return lflow;
901         }
902     }
903     return NULL;
904 }
905
906 static void
907 ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow)
908 {
909     if (lflow) {
910         hmap_remove(lflows, &lflow->hmap_node);
911         free(lflow->match);
912         free(lflow->actions);
913         free(lflow);
914     }
915 }
916
917 /* Appends port security constraints on L2 address field 'eth_addr_field'
918  * (e.g. "eth.src" or "eth.dst") to 'match'.  'port_security', with
919  * 'n_port_security' elements, is the collection of port_security constraints
920  * from an OVN_NB Logical_Port row. */
921 static void
922 build_port_security(const char *eth_addr_field,
923                     char **port_security, size_t n_port_security,
924                     struct ds *match)
925 {
926     size_t base_len = match->length;
927     ds_put_format(match, " && %s == {", eth_addr_field);
928
929     size_t n = 0;
930     for (size_t i = 0; i < n_port_security; i++) {
931         struct eth_addr ea;
932
933         if (eth_addr_from_string(port_security[i], &ea)) {
934             ds_put_format(match, ETH_ADDR_FMT, ETH_ADDR_ARGS(ea));
935             ds_put_char(match, ' ');
936             n++;
937         }
938     }
939     ds_chomp(match, ' ');
940     ds_put_cstr(match, "}");
941
942     if (!n) {
943         match->length = base_len;
944     }
945 }
946
947 static bool
948 lport_is_enabled(const struct nbrec_logical_port *lport)
949 {
950     return !lport->enabled || *lport->enabled;
951 }
952
953 static bool
954 has_stateful_acl(struct ovn_datapath *od)
955 {
956     for (size_t i = 0; i < od->nbs->n_acls; i++) {
957         struct nbrec_acl *acl = od->nbs->acls[i];
958         if (!strcmp(acl->action, "allow-related")) {
959             return true;
960         }
961     }
962
963     return false;
964 }
965
966 static void
967 build_acls(struct ovn_datapath *od, struct hmap *lflows)
968 {
969     bool has_stateful = has_stateful_acl(od);
970
971     /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
972      * allowed by default. */
973     ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
974     ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;");
975
976     /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
977      * default.  A related rule at priority 1 is added below if there
978      * are any stateful ACLs in this datapath. */
979     ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
980     ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
981
982     /* If there are any stateful ACL rules in this dapapath, we must
983      * send all IP packets through the conntrack action, which handles
984      * defragmentation, in order to match L4 headers. */
985     if (has_stateful) {
986         /* Ingress and Egress Pre-ACL Table (Priority 100).
987          *
988          * Regardless of whether the ACL is "from-lport" or "to-lport",
989          * we need rules in both the ingress and egress table, because
990          * the return traffic needs to be followed. */
991         ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip", "ct_next;");
992         ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip", "ct_next;");
993
994         /* Ingress and Egress ACL Table (Priority 1).
995          *
996          * By default, traffic is allowed.  This is partially handled by
997          * the Priority 0 ACL flows added earlier, but we also need to
998          * commit IP flows.  This is because, while the initiater's
999          * direction may not have any stateful rules, the server's may
1000          * and then its return traffic would not have an associated
1001          * conntrack entry and would return "+invalid". */
1002         ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1, "ip",
1003                       "ct_commit; next;");
1004         ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1, "ip",
1005                       "ct_commit; next;");
1006
1007         /* Ingress and Egress ACL Table (Priority 65535).
1008          *
1009          * Always drop traffic that's in an invalid state.  This is
1010          * enforced at a higher priority than ACLs can be defined. */
1011         ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1012                       "ct.inv", "drop;");
1013         ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1014                       "ct.inv", "drop;");
1015
1016         /* Ingress and Egress ACL Table (Priority 65535).
1017          *
1018          * Always allow traffic that is established to a committed
1019          * conntrack entry.  This is enforced at a higher priority than
1020          * ACLs can be defined. */
1021         ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1022                       "ct.est && !ct.rel && !ct.new && !ct.inv",
1023                       "next;");
1024         ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1025                       "ct.est && !ct.rel && !ct.new && !ct.inv",
1026                       "next;");
1027
1028         /* Ingress and Egress ACL Table (Priority 65535).
1029          *
1030          * Always allow traffic that is related to an existing conntrack
1031          * entry.  This is enforced at a higher priority than ACLs can
1032          * be defined.
1033          *
1034          * NOTE: This does not support related data sessions (eg,
1035          * a dynamically negotiated FTP data channel), but will allow
1036          * related traffic such as an ICMP Port Unreachable through
1037          * that's generated from a non-listening UDP port.  */
1038         ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1039                       "!ct.est && ct.rel && !ct.new && !ct.inv",
1040                       "next;");
1041         ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1042                       "!ct.est && ct.rel && !ct.new && !ct.inv",
1043                       "next;");
1044     }
1045
1046     /* Ingress or Egress ACL Table (Various priorities). */
1047     for (size_t i = 0; i < od->nbs->n_acls; i++) {
1048         struct nbrec_acl *acl = od->nbs->acls[i];
1049         bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
1050         enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
1051
1052         if (!strcmp(acl->action, "allow")) {
1053             /* If there are any stateful flows, we must even commit "allow"
1054              * actions.  This is because, while the initiater's
1055              * direction may not have any stateful rules, the server's
1056              * may and then its return traffic would not have an
1057              * associated conntrack entry and would return "+invalid". */
1058             const char *actions = has_stateful ? "ct_commit; next;" : "next;";
1059             ovn_lflow_add(lflows, od, stage, acl->priority,
1060                           acl->match, actions);
1061         } else if (!strcmp(acl->action, "allow-related")) {
1062             struct ds match = DS_EMPTY_INITIALIZER;
1063
1064             /* Commit the connection tracking entry, which allows all
1065              * other traffic related to this entry to flow due to the
1066              * 65535 priority flow defined earlier. */
1067             ds_put_format(&match, "ct.new && (%s)", acl->match);
1068             ovn_lflow_add(lflows, od, stage, acl->priority,
1069                           ds_cstr(&match), "ct_commit; next;");
1070
1071             ds_destroy(&match);
1072         } else if (!strcmp(acl->action, "drop")) {
1073             ovn_lflow_add(lflows, od, stage, acl->priority,
1074                           acl->match, "drop;");
1075         } else if (!strcmp(acl->action, "reject")) {
1076             /* xxx Need to support "reject". */
1077             VLOG_INFO("reject is not a supported action");
1078             ovn_lflow_add(lflows, od, stage, acl->priority,
1079                           acl->match, "drop;");
1080         }
1081     }
1082 }
1083
1084 static void
1085 build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
1086                     struct hmap *lflows, struct hmap *mcgroups)
1087 {
1088     /* This flow table structure is documented in ovn-northd(8), so please
1089      * update ovn-northd.8.xml if you change anything. */
1090
1091     /* Build pre-ACL and ACL tables for both ingress and egress.
1092      * Ingress tables 1 and 2.  Egress tables 0 and 1. */
1093     struct ovn_datapath *od;
1094     HMAP_FOR_EACH (od, key_node, datapaths) {
1095         if (!od->nbs) {
1096             continue;
1097         }
1098
1099         build_acls(od, lflows);
1100     }
1101
1102     /* Logical switch ingress table 0: Admission control framework (priority
1103      * 100). */
1104     HMAP_FOR_EACH (od, key_node, datapaths) {
1105         if (!od->nbs) {
1106             continue;
1107         }
1108
1109         /* Logical VLANs not supported. */
1110         ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC, 100, "vlan.present",
1111                       "drop;");
1112
1113         /* Broadcast/multicast source address is invalid. */
1114         ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC, 100, "eth.src[40]",
1115                       "drop;");
1116
1117         /* Port security flows have priority 50 (see below) and will continue
1118          * to the next table if packet source is acceptable. */
1119     }
1120
1121     /* Logical switch ingress table 0: Ingress port security (priority 50). */
1122     struct ovn_port *op;
1123     HMAP_FOR_EACH (op, key_node, ports) {
1124         if (!op->nbs) {
1125             continue;
1126         }
1127
1128         if (!lport_is_enabled(op->nbs)) {
1129             /* Drop packets from disabled logical ports (since logical flow
1130              * tables are default-drop). */
1131             continue;
1132         }
1133
1134         struct ds match = DS_EMPTY_INITIALIZER;
1135         ds_put_format(&match, "inport == %s", op->json_key);
1136         build_port_security("eth.src",
1137                             op->nbs->port_security, op->nbs->n_port_security,
1138                             &match);
1139         ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC, 50,
1140                       ds_cstr(&match), "next;");
1141         ds_destroy(&match);
1142     }
1143
1144     /* Ingress table 2: Destination lookup, broadcast and multicast handling
1145      * (priority 100). */
1146     HMAP_FOR_EACH (op, key_node, ports) {
1147         if (!op->nbs) {
1148             continue;
1149         }
1150
1151         if (lport_is_enabled(op->nbs)) {
1152             ovn_multicast_add(mcgroups, &mc_flood, op);
1153         }
1154     }
1155     HMAP_FOR_EACH (od, key_node, datapaths) {
1156         if (!od->nbs) {
1157             continue;
1158         }
1159
1160         ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
1161                       "outport = \""MC_FLOOD"\"; output;");
1162     }
1163
1164     /* Ingress table 3: Destination lookup, unicast handling (priority 50), */
1165     HMAP_FOR_EACH (op, key_node, ports) {
1166         if (!op->nbs) {
1167             continue;
1168         }
1169
1170         for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1171             struct eth_addr mac;
1172
1173             if (eth_addr_from_string(op->nbs->addresses[i], &mac)) {
1174                 struct ds match, actions;
1175
1176                 ds_init(&match);
1177                 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
1178                               ETH_ADDR_ARGS(mac));
1179
1180                 ds_init(&actions);
1181                 ds_put_format(&actions, "outport = %s; output;", op->json_key);
1182                 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
1183                               ds_cstr(&match), ds_cstr(&actions));
1184                 ds_destroy(&actions);
1185                 ds_destroy(&match);
1186             } else if (!strcmp(op->nbs->addresses[i], "unknown")) {
1187                 if (lport_is_enabled(op->nbs)) {
1188                     ovn_multicast_add(mcgroups, &mc_unknown, op);
1189                     op->od->has_unknown = true;
1190                 }
1191             } else {
1192                 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1193
1194                 VLOG_INFO_RL(&rl,
1195                              "%s: invalid syntax '%s' in addresses column",
1196                              op->nbs->name, op->nbs->addresses[i]);
1197             }
1198         }
1199     }
1200
1201     /* Ingress table 3: Destination lookup for unknown MACs (priority 0). */
1202     HMAP_FOR_EACH (od, key_node, datapaths) {
1203         if (!od->nbs) {
1204             continue;
1205         }
1206
1207         if (od->has_unknown) {
1208             ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
1209                           "outport = \""MC_UNKNOWN"\"; output;");
1210         }
1211     }
1212
1213     /* Egress table 2: Egress port security multicast/broadcast (priority
1214      * 100). */
1215     HMAP_FOR_EACH (od, key_node, datapaths) {
1216         if (!od->nbs) {
1217             continue;
1218         }
1219
1220         ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC, 100, "eth.mcast",
1221                       "output;");
1222     }
1223
1224     /* Egress table 2: Egress port security (priorities 50 and 150).
1225      *
1226      * Priority 50 rules implement port security for enabled logical port.
1227      *
1228      * Priority 150 rules drop packets to disabled logical ports, so that they
1229      * don't even receive multicast or broadcast packets. */
1230     HMAP_FOR_EACH (op, key_node, ports) {
1231         if (!op->nbs) {
1232             continue;
1233         }
1234
1235         struct ds match = DS_EMPTY_INITIALIZER;
1236         ds_put_format(&match, "outport == %s", op->json_key);
1237         if (lport_is_enabled(op->nbs)) {
1238             build_port_security("eth.dst", op->nbs->port_security,
1239                                 op->nbs->n_port_security, &match);
1240             ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC, 50,
1241                           ds_cstr(&match), "output;");
1242         } else {
1243             ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC, 150,
1244                           ds_cstr(&match), "drop;");
1245         }
1246
1247         ds_destroy(&match);
1248     }
1249 }
1250
1251 static bool
1252 lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
1253 {
1254     return !lrport->enabled || *lrport->enabled;
1255 }
1256
1257 static void
1258 add_route(struct hmap *lflows, struct ovn_datapath *od,
1259           ovs_be32 network, ovs_be32 mask, ovs_be32 gateway)
1260 {
1261     char *match = xasprintf("ip4.dst == "IP_FMT"/"IP_FMT,
1262                             IP_ARGS(network), IP_ARGS(mask));
1263
1264     struct ds actions = DS_EMPTY_INITIALIZER;
1265     ds_put_cstr(&actions, "ip4.ttl--; reg0 = ");
1266     if (gateway) {
1267         ds_put_format(&actions, IP_FMT, IP_ARGS(gateway));
1268     } else {
1269         ds_put_cstr(&actions, "ip4.dst");
1270     }
1271     ds_put_cstr(&actions, "; next;");
1272
1273     /* The priority here is calculated to implement longest-prefix-match
1274      * routing. */
1275     ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
1276                   count_1bits(ntohl(mask)), match, ds_cstr(&actions));
1277     ds_destroy(&actions);
1278     free(match);
1279 }
1280
1281 static void
1282 build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
1283                     struct hmap *lflows)
1284 {
1285     /* This flow table structure is documented in ovn-northd(8), so please
1286      * update ovn-northd.8.xml if you change anything. */
1287
1288     /* XXX ICMP echo reply */
1289
1290     /* Logical router ingress table 0: Admission control framework. */
1291     struct ovn_datapath *od;
1292     HMAP_FOR_EACH (od, key_node, datapaths) {
1293         if (!od->nbr) {
1294             continue;
1295         }
1296
1297         /* Logical VLANs not supported.
1298          * Broadcast/multicast source address is invalid. */
1299         ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
1300                       "vlan.present || eth.src[40]", "drop;");
1301     }
1302
1303     /* Logical router ingress table 0: match (priority 50). */
1304     struct ovn_port *op;
1305     HMAP_FOR_EACH (op, key_node, ports) {
1306         if (!op->nbr) {
1307             continue;
1308         }
1309
1310         if (!lrport_is_enabled(op->nbr)) {
1311             /* Drop packets from disabled logical ports (since logical flow
1312              * tables are default-drop). */
1313             continue;
1314         }
1315
1316         char *match = xasprintf(
1317             "(eth.mcast || eth.dst == "ETH_ADDR_FMT") && inport == %s",
1318             ETH_ADDR_ARGS(op->mac), op->json_key);
1319         ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
1320                       match, "next;");
1321     }
1322
1323     /* Logical router ingress table 1: IP Input. */
1324     HMAP_FOR_EACH (od, key_node, datapaths) {
1325         if (!od->nbr) {
1326             continue;
1327         }
1328
1329         /* L3 admission control: drop multicast and broadcast source, localhost
1330          * source or destination, and zero network source or destination
1331          * (priority 100). */
1332         ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
1333                       "ip4.mcast || "
1334                       "ip4.src == 255.255.255.255 || "
1335                       "ip4.src == 127.0.0.0/8 || "
1336                       "ip4.dst == 127.0.0.0/8 || "
1337                       "ip4.src == 0.0.0.0/8 || "
1338                       "ip4.dst == 0.0.0.0/8",
1339                       "drop;");
1340
1341         /* Drop Ethernet local broadcast.  By definition this traffic should
1342          * not be forwarded.*/
1343         ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
1344                       "eth.bcast", "drop;");
1345
1346         /* Drop IP multicast. */
1347         ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
1348                       "ip4.mcast", "drop;");
1349
1350         /* TTL discard.
1351          *
1352          * XXX Need to send ICMP time exceeded if !ip.later_frag. */
1353         char *match = xasprintf("ip4 && ip.ttl == {0, 1}");
1354         ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30, match, "drop;");
1355         free(match);
1356
1357         /* Pass other traffic not already handled to the next table for
1358          * routing. */
1359         ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
1360     }
1361
1362     HMAP_FOR_EACH (op, key_node, ports) {
1363         if (!op->nbr) {
1364             continue;
1365         }
1366
1367         /* L3 admission control: drop packets that originate from an IP address
1368          * owned by the router or a broadcast address known to the router
1369          * (priority 100). */
1370         char *match = xasprintf("ip4.src == {"IP_FMT", "IP_FMT"}",
1371                                 IP_ARGS(op->ip), IP_ARGS(op->bcast));
1372         ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
1373                       match, "drop;");
1374         free(match);
1375
1376         /* ARP reply.  These flows reply to ARP requests for the router's own
1377          * IP address. */
1378         match = xasprintf(
1379             "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
1380             op->json_key, IP_ARGS(op->ip));
1381         char *actions = xasprintf(
1382             "eth.dst = eth.src; "
1383             "eth.src = "ETH_ADDR_FMT"; "
1384             "arp.op = 2; /* ARP reply */ "
1385             "arp.tha = arp.sha; "
1386             "arp.sha = "ETH_ADDR_FMT"; "
1387             "arp.tpa = arp.spa; "
1388             "arp.spa = "IP_FMT"; "
1389             "outport = %s; "
1390             "inport = \"\"; /* Allow sending out inport. */ "
1391             "output;",
1392             ETH_ADDR_ARGS(op->mac),
1393             ETH_ADDR_ARGS(op->mac),
1394             IP_ARGS(op->ip),
1395             op->json_key);
1396         ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
1397                       match, actions);
1398
1399         /* Drop IP traffic to this router. */
1400         match = xasprintf("ip4.dst == "IP_FMT, IP_ARGS(op->ip));
1401         ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
1402                       match, "drop;");
1403         free(match);
1404     }
1405
1406     /* Logical router ingress table 2: IP Routing.
1407      *
1408      * A packet that arrives at this table is an IP packet that should be
1409      * routed to the address in ip4.dst. This table sets reg0 to the next-hop
1410      * IP address (leaving ip4.dst, the packet’s final destination, unchanged)
1411      * and advances to the next table for ARP resolution. */
1412     HMAP_FOR_EACH (op, key_node, ports) {
1413         if (!op->nbr) {
1414             continue;
1415         }
1416
1417         add_route(lflows, op->od, op->network, op->mask, 0);
1418     }
1419     HMAP_FOR_EACH (od, key_node, datapaths) {
1420         if (!od->nbr) {
1421             continue;
1422         }
1423
1424         if (od->gateway) {
1425             add_route(lflows, od, 0, 0, od->gateway);
1426         }
1427     }
1428     /* XXX destination unreachable */
1429
1430     /* Local router ingress table 3: ARP Resolution.
1431      *
1432      * Any packet that reaches this table is an IP packet whose next-hop IP
1433      * address is in reg0. (ip4.dst is the final destination.) This table
1434      * resolves the IP address in reg0 into an output port in outport and an
1435      * Ethernet address in eth.dst. */
1436     HMAP_FOR_EACH (op, key_node, ports) {
1437         if (op->nbr) {
1438             /* XXX ARP for neighboring router */
1439         } else if (op->od->n_router_ports) {
1440             for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1441                 struct eth_addr ea;
1442                 ovs_be32 ip;
1443
1444                 if (ovs_scan(op->nbs->addresses[i],
1445                              ETH_ADDR_SCAN_FMT" "IP_SCAN_FMT,
1446                              ETH_ADDR_SCAN_ARGS(ea), IP_SCAN_ARGS(&ip))) {
1447                     for (size_t j = 0; j < op->od->n_router_ports; j++) {
1448                         /* Get the Logical_Router_Port that the Logical_Port is
1449                          * connected to, as 'peer'. */
1450                         const char *peer_name = smap_get(
1451                             &op->od->router_ports[j]->nbs->options,
1452                             "router-port");
1453                         if (!peer_name) {
1454                             continue;
1455                         }
1456
1457                         struct ovn_port *peer
1458                             = ovn_port_find(ports, peer_name);
1459                         if (!peer || !peer->nbr) {
1460                             continue;
1461                         }
1462
1463                         /* Make sure that 'ip' is in 'peer''s network. */
1464                         if ((ip ^ peer->network) & peer->mask) {
1465                             continue;
1466                         }
1467
1468                         char *match = xasprintf("reg0 == "IP_FMT, IP_ARGS(ip));
1469                         char *actions = xasprintf("eth.src = "ETH_ADDR_FMT"; "
1470                                                   "eth.dst = "ETH_ADDR_FMT"; "
1471                                                   "outport = %s; "
1472                                                   "output;",
1473                                                   ETH_ADDR_ARGS(peer->mac),
1474                                                   ETH_ADDR_ARGS(ea),
1475                                                   peer->json_key);
1476                         ovn_lflow_add(lflows, peer->od,
1477                                       S_ROUTER_IN_ARP, 200, match, actions);
1478                         free(actions);
1479                         free(match);
1480                         break;
1481                     }
1482                 }
1483             }
1484         }
1485     }
1486
1487     /* Logical router egress table 0: Delivery (priority 100).
1488      *
1489      * Priority 100 rules deliver packets to enabled logical ports. */
1490     HMAP_FOR_EACH (op, key_node, ports) {
1491         if (!op->nbr) {
1492             continue;
1493         }
1494
1495         if (!lrport_is_enabled(op->nbr)) {
1496             /* Drop packets to disabled logical ports (since logical flow
1497              * tables are default-drop). */
1498             continue;
1499         }
1500
1501         char *match = xasprintf("outport == %s", op->json_key);
1502         ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
1503                       match, "output;");
1504         free(match);
1505     }
1506 }
1507
1508 /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
1509  * constructing their contents based on the OVN_NB database. */
1510 static void
1511 build_lflows(struct northd_context *ctx, struct hmap *datapaths,
1512              struct hmap *ports)
1513 {
1514     struct hmap lflows = HMAP_INITIALIZER(&lflows);
1515     struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
1516
1517     build_lswitch_flows(datapaths, ports, &lflows, &mcgroups);
1518     build_lrouter_flows(datapaths, ports, &lflows);
1519
1520     /* Push changes to the Logical_Flow table to database. */
1521     const struct sbrec_logical_flow *sbflow, *next_sbflow;
1522     SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
1523         struct ovn_datapath *od
1524             = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath);
1525         if (!od) {
1526             sbrec_logical_flow_delete(sbflow);
1527             continue;
1528         }
1529
1530         enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
1531         enum ovn_pipeline pipeline
1532             = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
1533         struct ovn_lflow *lflow = ovn_lflow_find(
1534             &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id),
1535             sbflow->priority, sbflow->match, sbflow->actions);
1536         if (lflow) {
1537             ovn_lflow_destroy(&lflows, lflow);
1538         } else {
1539             sbrec_logical_flow_delete(sbflow);
1540         }
1541     }
1542     struct ovn_lflow *lflow, *next_lflow;
1543     HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) {
1544         enum ovn_pipeline pipeline = ovn_stage_get_pipeline(lflow->stage);
1545         uint8_t table = ovn_stage_get_table(lflow->stage);
1546
1547         sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
1548         sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
1549         sbrec_logical_flow_set_pipeline(
1550             sbflow, pipeline == P_IN ? "ingress" : "egress");
1551         sbrec_logical_flow_set_table_id(sbflow, table);
1552         sbrec_logical_flow_set_priority(sbflow, lflow->priority);
1553         sbrec_logical_flow_set_match(sbflow, lflow->match);
1554         sbrec_logical_flow_set_actions(sbflow, lflow->actions);
1555
1556         const struct smap ids = SMAP_CONST1(&ids, "stage-name",
1557                                             ovn_stage_to_str(lflow->stage));
1558         sbrec_logical_flow_set_external_ids(sbflow, &ids);
1559
1560         ovn_lflow_destroy(&lflows, lflow);
1561     }
1562     hmap_destroy(&lflows);
1563
1564     /* Push changes to the Multicast_Group table to database. */
1565     const struct sbrec_multicast_group *sbmc, *next_sbmc;
1566     SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) {
1567         struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths,
1568                                                           sbmc->datapath);
1569         if (!od) {
1570             sbrec_multicast_group_delete(sbmc);
1571             continue;
1572         }
1573
1574         struct multicast_group group = { .name = sbmc->name,
1575                                          .key = sbmc->tunnel_key };
1576         struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group);
1577         if (mc) {
1578             ovn_multicast_update_sbrec(mc, sbmc);
1579             ovn_multicast_destroy(&mcgroups, mc);
1580         } else {
1581             sbrec_multicast_group_delete(sbmc);
1582         }
1583     }
1584     struct ovn_multicast *mc, *next_mc;
1585     HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) {
1586         sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn);
1587         sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb);
1588         sbrec_multicast_group_set_name(sbmc, mc->group->name);
1589         sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key);
1590         ovn_multicast_update_sbrec(mc, sbmc);
1591         ovn_multicast_destroy(&mcgroups, mc);
1592     }
1593     hmap_destroy(&mcgroups);
1594 }
1595 \f
1596 static void
1597 ovnnb_db_changed(struct northd_context *ctx)
1598 {
1599     VLOG_DBG("ovn-nb db contents have changed.");
1600
1601     struct hmap datapaths, ports;
1602     build_datapaths(ctx, &datapaths);
1603     build_ports(ctx, &datapaths, &ports);
1604     build_lflows(ctx, &datapaths, &ports);
1605
1606     struct ovn_datapath *dp, *next_dp;
1607     HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) {
1608         ovn_datapath_destroy(&datapaths, dp);
1609     }
1610     hmap_destroy(&datapaths);
1611
1612     struct ovn_port *port, *next_port;
1613     HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) {
1614         ovn_port_destroy(&ports, port);
1615     }
1616     hmap_destroy(&ports);
1617 }
1618
1619 /*
1620  * The only change we get notified about is if the 'chassis' column of the
1621  * 'Port_Binding' table changes.  When this column is not empty, it means we
1622  * need to set the corresponding logical port as 'up' in the northbound DB.
1623  */
1624 static void
1625 ovnsb_db_changed(struct northd_context *ctx)
1626 {
1627     struct hmap lports_hmap;
1628     const struct sbrec_port_binding *sb;
1629     const struct nbrec_logical_port *nb;
1630
1631     struct lport_hash_node {
1632         struct hmap_node node;
1633         const struct nbrec_logical_port *nb;
1634     } *hash_node, *hash_node_next;
1635
1636     VLOG_DBG("Recalculating port up states for ovn-nb db.");
1637
1638     hmap_init(&lports_hmap);
1639
1640     NBREC_LOGICAL_PORT_FOR_EACH(nb, ctx->ovnnb_idl) {
1641         hash_node = xzalloc(sizeof *hash_node);
1642         hash_node->nb = nb;
1643         hmap_insert(&lports_hmap, &hash_node->node, hash_string(nb->name, 0));
1644     }
1645
1646     SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) {
1647         nb = NULL;
1648         HMAP_FOR_EACH_WITH_HASH(hash_node, node,
1649                                 hash_string(sb->logical_port, 0),
1650                                 &lports_hmap) {
1651             if (!strcmp(sb->logical_port, hash_node->nb->name)) {
1652                 nb = hash_node->nb;
1653                 break;
1654             }
1655         }
1656
1657         if (!nb) {
1658             /* The logical port doesn't exist for this port binding.  This can
1659              * happen under normal circumstances when ovn-northd hasn't gotten
1660              * around to pruning the Port_Binding yet. */
1661             continue;
1662         }
1663
1664         if (sb->chassis && (!nb->up || !*nb->up)) {
1665             bool up = true;
1666             nbrec_logical_port_set_up(nb, &up, 1);
1667         } else if (!sb->chassis && (!nb->up || *nb->up)) {
1668             bool up = false;
1669             nbrec_logical_port_set_up(nb, &up, 1);
1670         }
1671     }
1672
1673     HMAP_FOR_EACH_SAFE(hash_node, hash_node_next, node, &lports_hmap) {
1674         hmap_remove(&lports_hmap, &hash_node->node);
1675         free(hash_node);
1676     }
1677     hmap_destroy(&lports_hmap);
1678 }
1679 \f
1680
1681 static char *default_db_;
1682
1683 static const char *
1684 default_db(void)
1685 {
1686     if (!default_db_) {
1687         default_db_ = xasprintf("unix:%s/db.sock", ovs_rundir());
1688     }
1689     return default_db_;
1690 }
1691
1692 static void
1693 parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
1694 {
1695     enum {
1696         DAEMON_OPTION_ENUMS,
1697         VLOG_OPTION_ENUMS,
1698     };
1699     static const struct option long_options[] = {
1700         {"ovnsb-db", required_argument, NULL, 'd'},
1701         {"ovnnb-db", required_argument, NULL, 'D'},
1702         {"help", no_argument, NULL, 'h'},
1703         {"options", no_argument, NULL, 'o'},
1704         {"version", no_argument, NULL, 'V'},
1705         DAEMON_LONG_OPTIONS,
1706         VLOG_LONG_OPTIONS,
1707         STREAM_SSL_LONG_OPTIONS,
1708         {NULL, 0, NULL, 0},
1709     };
1710     char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
1711
1712     for (;;) {
1713         int c;
1714
1715         c = getopt_long(argc, argv, short_options, long_options, NULL);
1716         if (c == -1) {
1717             break;
1718         }
1719
1720         switch (c) {
1721         DAEMON_OPTION_HANDLERS;
1722         VLOG_OPTION_HANDLERS;
1723         STREAM_SSL_OPTION_HANDLERS;
1724
1725         case 'd':
1726             ovnsb_db = optarg;
1727             break;
1728
1729         case 'D':
1730             ovnnb_db = optarg;
1731             break;
1732
1733         case 'h':
1734             usage();
1735             exit(EXIT_SUCCESS);
1736
1737         case 'o':
1738             ovs_cmdl_print_options(long_options);
1739             exit(EXIT_SUCCESS);
1740
1741         case 'V':
1742             ovs_print_version(0, 0);
1743             exit(EXIT_SUCCESS);
1744
1745         default:
1746             break;
1747         }
1748     }
1749
1750     if (!ovnsb_db) {
1751         ovnsb_db = default_db();
1752     }
1753
1754     if (!ovnnb_db) {
1755         ovnnb_db = default_db();
1756     }
1757
1758     free(short_options);
1759 }
1760
1761 static void
1762 add_column_noalert(struct ovsdb_idl *idl,
1763                    const struct ovsdb_idl_column *column)
1764 {
1765     ovsdb_idl_add_column(idl, column);
1766     ovsdb_idl_omit_alert(idl, column);
1767 }
1768
1769 int
1770 main(int argc, char *argv[])
1771 {
1772     extern struct vlog_module VLM_reconnect;
1773     struct ovsdb_idl *ovnnb_idl, *ovnsb_idl;
1774     unsigned int ovnnb_seqno, ovn_seqno;
1775     int res = EXIT_SUCCESS;
1776     struct northd_context ctx = {
1777         .ovnsb_txn = NULL,
1778     };
1779     bool ovnnb_changes_pending = false;
1780     bool ovn_changes_pending = false;
1781     struct unixctl_server *unixctl;
1782     int retval;
1783     bool exiting;
1784
1785     fatal_ignore_sigpipe();
1786     set_program_name(argv[0]);
1787     service_start(&argc, &argv);
1788     vlog_set_levels(NULL, VLF_CONSOLE, VLL_WARN);
1789     vlog_set_levels(&VLM_reconnect, VLF_ANY_DESTINATION, VLL_WARN);
1790     parse_options(argc, argv);
1791
1792     daemonize_start(false);
1793
1794     retval = unixctl_server_create(NULL, &unixctl);
1795     if (retval) {
1796         exit(EXIT_FAILURE);
1797     }
1798     unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
1799
1800     daemonize_complete();
1801
1802     nbrec_init();
1803     sbrec_init();
1804
1805     /* We want to detect all changes to the ovn-nb db. */
1806     ctx.ovnnb_idl = ovnnb_idl = ovsdb_idl_create(ovnnb_db,
1807             &nbrec_idl_class, true, true);
1808
1809     ctx.ovnsb_idl = ovnsb_idl = ovsdb_idl_create(ovnsb_db,
1810             &sbrec_idl_class, false, true);
1811
1812     ovsdb_idl_add_table(ovnsb_idl, &sbrec_table_logical_flow);
1813     add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_logical_datapath);
1814     add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_pipeline);
1815     add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_table_id);
1816     add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_priority);
1817     add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_match);
1818     add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_actions);
1819
1820     ovsdb_idl_add_table(ovnsb_idl, &sbrec_table_multicast_group);
1821     add_column_noalert(ovnsb_idl, &sbrec_multicast_group_col_datapath);
1822     add_column_noalert(ovnsb_idl, &sbrec_multicast_group_col_tunnel_key);
1823     add_column_noalert(ovnsb_idl, &sbrec_multicast_group_col_name);
1824     add_column_noalert(ovnsb_idl, &sbrec_multicast_group_col_ports);
1825
1826     ovsdb_idl_add_table(ovnsb_idl, &sbrec_table_datapath_binding);
1827     add_column_noalert(ovnsb_idl, &sbrec_datapath_binding_col_tunnel_key);
1828     add_column_noalert(ovnsb_idl, &sbrec_datapath_binding_col_external_ids);
1829
1830     ovsdb_idl_add_table(ovnsb_idl, &sbrec_table_port_binding);
1831     add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_datapath);
1832     add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_logical_port);
1833     add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_tunnel_key);
1834     add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_parent_port);
1835     add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_tag);
1836     add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_type);
1837     add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_options);
1838     add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_mac);
1839     ovsdb_idl_add_column(ovnsb_idl, &sbrec_port_binding_col_chassis);
1840
1841     /*
1842      * The loop here just runs the IDL in a loop waiting for the seqno to
1843      * change, which indicates that the contents of the db have changed.
1844      *
1845      * If the contents of the ovn-nb db change, the mappings to the ovn-sb
1846      * db must be recalculated.
1847      *
1848      * If the contents of the ovn-sb db change, it means the 'up' state of
1849      * a port may have changed, as that's the only type of change ovn-northd is
1850      * watching for.
1851      */
1852
1853     ovnnb_seqno = ovsdb_idl_get_seqno(ovnnb_idl);
1854     ovn_seqno = ovsdb_idl_get_seqno(ovnsb_idl);
1855     exiting = false;
1856     while (!exiting) {
1857         ovsdb_idl_run(ovnnb_idl);
1858         ovsdb_idl_run(ovnsb_idl);
1859         unixctl_server_run(unixctl);
1860
1861         if (!ovsdb_idl_is_alive(ovnnb_idl)) {
1862             int retval = ovsdb_idl_get_last_error(ovnnb_idl);
1863             VLOG_ERR("%s: database connection failed (%s)",
1864                     ovnnb_db, ovs_retval_to_string(retval));
1865             res = EXIT_FAILURE;
1866             break;
1867         }
1868
1869         if (!ovsdb_idl_is_alive(ovnsb_idl)) {
1870             int retval = ovsdb_idl_get_last_error(ovnsb_idl);
1871             VLOG_ERR("%s: database connection failed (%s)",
1872                     ovnsb_db, ovs_retval_to_string(retval));
1873             res = EXIT_FAILURE;
1874             break;
1875         }
1876
1877         if (ovnnb_seqno != ovsdb_idl_get_seqno(ovnnb_idl)) {
1878             ovnnb_seqno = ovsdb_idl_get_seqno(ovnnb_idl);
1879             ovnnb_changes_pending = true;
1880         }
1881
1882         if (ovn_seqno != ovsdb_idl_get_seqno(ovnsb_idl)) {
1883             ovn_seqno = ovsdb_idl_get_seqno(ovnsb_idl);
1884             ovn_changes_pending = true;
1885         }
1886
1887         /*
1888          * If there are any pending changes, we delay recalculating the
1889          * necessary updates until after an existing transaction finishes.
1890          * This avoids the possibility of rapid updates causing ovn-northd to
1891          * never be able to successfully make the corresponding updates to the
1892          * other db.  Instead, pending changes are batched up until the next
1893          * time we get a chance to calculate the new state and apply it.
1894          */
1895
1896         if (ovnnb_changes_pending && !ctx.ovnsb_txn) {
1897             /*
1898              * The OVN-nb db contents have changed, so create a transaction for
1899              * updating the OVN-sb DB.
1900              */
1901             ctx.ovnsb_txn = ovsdb_idl_txn_create(ctx.ovnsb_idl);
1902             ovsdb_idl_txn_add_comment(ctx.ovnsb_txn,
1903                                       "ovn-northd: northbound db changed");
1904             ovnnb_db_changed(&ctx);
1905             ovnnb_changes_pending = false;
1906         }
1907
1908         if (ovn_changes_pending && !ctx.ovnnb_txn) {
1909             /*
1910              * The OVN-sb db contents have changed, so create a transaction for
1911              * updating the northbound DB.
1912              */
1913             ctx.ovnnb_txn = ovsdb_idl_txn_create(ctx.ovnnb_idl);
1914             ovsdb_idl_txn_add_comment(ctx.ovnnb_txn,
1915                                       "ovn-northd: southbound db changed");
1916             ovnsb_db_changed(&ctx);
1917             ovn_changes_pending = false;
1918         }
1919
1920         if (ctx.ovnnb_txn) {
1921             enum ovsdb_idl_txn_status txn_status;
1922             txn_status = ovsdb_idl_txn_commit(ctx.ovnnb_txn);
1923             switch (txn_status) {
1924             case TXN_UNCOMMITTED:
1925             case TXN_INCOMPLETE:
1926                 /* Come back around and try to commit this transaction again */
1927                 break;
1928             case TXN_ABORTED:
1929             case TXN_TRY_AGAIN:
1930             case TXN_NOT_LOCKED:
1931             case TXN_ERROR:
1932                 /* Something went wrong, so try creating a new transaction. */
1933                 ovn_changes_pending = true;
1934             case TXN_UNCHANGED:
1935             case TXN_SUCCESS:
1936                 ovsdb_idl_txn_destroy(ctx.ovnnb_txn);
1937                 ctx.ovnnb_txn = NULL;
1938             }
1939         }
1940
1941         if (ctx.ovnsb_txn) {
1942             enum ovsdb_idl_txn_status txn_status;
1943             txn_status = ovsdb_idl_txn_commit(ctx.ovnsb_txn);
1944             switch (txn_status) {
1945             case TXN_UNCOMMITTED:
1946             case TXN_INCOMPLETE:
1947                 /* Come back around and try to commit this transaction again */
1948                 break;
1949             case TXN_ABORTED:
1950             case TXN_TRY_AGAIN:
1951             case TXN_NOT_LOCKED:
1952             case TXN_ERROR:
1953                 /* Something went wrong, so try creating a new transaction. */
1954                 ovnnb_changes_pending = true;
1955             case TXN_UNCHANGED:
1956             case TXN_SUCCESS:
1957                 ovsdb_idl_txn_destroy(ctx.ovnsb_txn);
1958                 ctx.ovnsb_txn = NULL;
1959             }
1960         }
1961
1962         if (ovnnb_seqno == ovsdb_idl_get_seqno(ovnnb_idl) &&
1963                 ovn_seqno == ovsdb_idl_get_seqno(ovnsb_idl)) {
1964             ovsdb_idl_wait(ovnnb_idl);
1965             ovsdb_idl_wait(ovnsb_idl);
1966             if (ctx.ovnnb_txn) {
1967                 ovsdb_idl_txn_wait(ctx.ovnnb_txn);
1968             }
1969             if (ctx.ovnsb_txn) {
1970                 ovsdb_idl_txn_wait(ctx.ovnsb_txn);
1971             }
1972             unixctl_server_wait(unixctl);
1973             if (exiting) {
1974                 poll_immediate_wake();
1975             }
1976             poll_block();
1977         }
1978         if (should_service_stop()) {
1979             exiting = true;
1980         }
1981     }
1982
1983     unixctl_server_destroy(unixctl);
1984     ovsdb_idl_destroy(ovnsb_idl);
1985     ovsdb_idl_destroy(ovnnb_idl);
1986     service_stop();
1987
1988     free(default_db_);
1989
1990     exit(res);
1991 }
1992
1993 static void
1994 ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
1995                 const char *argv[] OVS_UNUSED, void *exiting_)
1996 {
1997     bool *exiting = exiting_;
1998     *exiting = true;
1999
2000     unixctl_command_reply(conn, NULL);
2001 }