2 * Copyright (c) 2014 Red Hat, Inc.
4 * Based on mac-learning implementation.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at:
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
20 #include "mcast-snooping.h"
26 #include "byte-order.h"
30 #include "poll-loop.h"
33 #include "unaligned.h"
35 #include "vlan-bitmap.h"
36 #include "openvswitch/vlog.h"
38 COVERAGE_DEFINE(mcast_snooping_learned);
39 COVERAGE_DEFINE(mcast_snooping_expired);
41 static struct mcast_port_bundle *
42 mcast_snooping_port_lookup(struct ovs_list *list, void *port);
43 static struct mcast_mrouter_bundle *
44 mcast_snooping_mrouter_lookup(struct mcast_snooping *ms, uint16_t vlan,
46 OVS_REQ_RDLOCK(ms->rwlock);
49 mcast_snooping_enabled(const struct mcast_snooping *ms)
55 mcast_snooping_flood_unreg(const struct mcast_snooping *ms)
57 return ms->flood_unreg;
61 mcast_snooping_is_query(ovs_be16 igmp_type)
63 return igmp_type == htons(IGMP_HOST_MEMBERSHIP_QUERY);
67 mcast_snooping_is_membership(ovs_be16 igmp_type)
69 switch (ntohs(igmp_type)) {
70 case IGMP_HOST_MEMBERSHIP_REPORT:
71 case IGMPV2_HOST_MEMBERSHIP_REPORT:
72 case IGMP_HOST_LEAVE_MESSAGE:
78 /* Returns the number of seconds since multicast group 'b' was learned in a
81 mcast_bundle_age(const struct mcast_snooping *ms,
82 const struct mcast_group_bundle *b)
84 time_t remaining = b->expires - time_now();
85 return ms->idle_time - remaining;
89 mcast_table_hash(const struct mcast_snooping *ms, ovs_be32 grp_ip4,
92 return hash_3words((OVS_FORCE uint32_t) grp_ip4, vlan, ms->secret);
95 static struct mcast_group_bundle *
96 mcast_group_bundle_from_lru_node(struct ovs_list *list)
98 return CONTAINER_OF(list, struct mcast_group_bundle, bundle_node);
101 static struct mcast_group *
102 mcast_group_from_lru_node(struct ovs_list *list)
104 return CONTAINER_OF(list, struct mcast_group, group_node);
107 /* Searches 'ms' for and returns an mcast group for destination address
108 * 'dip' in 'vlan'. */
110 mcast_snooping_lookup(const struct mcast_snooping *ms, ovs_be32 dip,
112 OVS_REQ_RDLOCK(ms->rwlock)
114 struct mcast_group *grp;
117 hash = mcast_table_hash(ms, dip, vlan);
118 HMAP_FOR_EACH_WITH_HASH (grp, hmap_node, hash, &ms->table) {
119 if (grp->vlan == vlan && grp->ip4 == dip) {
126 /* If the LRU list is not empty, stores the least-recently-used entry
127 * in '*e' and returns true. Otherwise, if the LRU list is empty,
128 * stores NULL in '*e' and return false. */
130 group_get_lru(const struct mcast_snooping *ms, struct mcast_group **grp)
131 OVS_REQ_RDLOCK(ms->rwlock)
133 if (!list_is_empty(&ms->group_lru)) {
134 *grp = mcast_group_from_lru_node(ms->group_lru.next);
143 normalize_idle_time(unsigned int idle_time)
145 return (idle_time < 15 ? 15
146 : idle_time > 3600 ? 3600
150 /* Creates and returns a new mcast table with an initial mcast aging
151 * timeout of MCAST_ENTRY_DEFAULT_IDLE_TIME seconds and an initial maximum of
152 * MCAST_DEFAULT_MAX entries. */
153 struct mcast_snooping *
154 mcast_snooping_create(void)
156 struct mcast_snooping *ms;
158 ms = xmalloc(sizeof *ms);
159 hmap_init(&ms->table);
160 list_init(&ms->group_lru);
161 list_init(&ms->mrouter_lru);
162 list_init(&ms->fport_list);
163 list_init(&ms->rport_list);
164 ms->secret = random_uint32();
165 ms->idle_time = MCAST_ENTRY_DEFAULT_IDLE_TIME;
166 ms->max_entries = MCAST_DEFAULT_MAX_ENTRIES;
167 ms->need_revalidate = false;
168 ms->flood_unreg = true;
169 ovs_refcount_init(&ms->ref_cnt);
170 ovs_rwlock_init(&ms->rwlock);
174 struct mcast_snooping *
175 mcast_snooping_ref(const struct mcast_snooping *ms_)
177 struct mcast_snooping *ms = CONST_CAST(struct mcast_snooping *, ms_);
179 ovs_refcount_ref(&ms->ref_cnt);
184 /* Unreferences (and possibly destroys) mcast snooping table 'ms'. */
186 mcast_snooping_unref(struct mcast_snooping *ms)
188 if (!mcast_snooping_enabled(ms)) {
192 if (ovs_refcount_unref_relaxed(&ms->ref_cnt) == 1) {
193 mcast_snooping_flush(ms);
194 hmap_destroy(&ms->table);
195 ovs_rwlock_destroy(&ms->rwlock);
200 /* Changes the mcast aging timeout of 'ms' to 'idle_time' seconds. */
202 mcast_snooping_set_idle_time(struct mcast_snooping *ms, unsigned int idle_time)
203 OVS_REQ_WRLOCK(ms->rwlock)
205 struct mcast_group *grp;
206 struct mcast_group_bundle *b;
209 idle_time = normalize_idle_time(idle_time);
210 if (idle_time != ms->idle_time) {
211 delta = (int) idle_time - (int) ms->idle_time;
212 LIST_FOR_EACH (grp, group_node, &ms->group_lru) {
213 LIST_FOR_EACH (b, bundle_node, &grp->bundle_lru) {
217 ms->idle_time = idle_time;
221 /* Sets the maximum number of entries in 'ms' to 'max_entries', adjusting it
222 * to be within a reasonable range. */
224 mcast_snooping_set_max_entries(struct mcast_snooping *ms,
226 OVS_REQ_WRLOCK(ms->rwlock)
228 ms->max_entries = (max_entries < 10 ? 10
229 : max_entries > 1000 * 1000 ? 1000 * 1000
233 /* Sets if unregistered multicast packets should be flooded to
234 * all ports or only to ports connected to multicast routers
236 * Returns true if previous state differs from current state,
237 * false otherwise. */
239 mcast_snooping_set_flood_unreg(struct mcast_snooping *ms, bool enable)
240 OVS_REQ_WRLOCK(ms->rwlock)
242 bool prev = ms->flood_unreg;
243 ms->flood_unreg = enable;
244 return prev != enable;
247 static struct mcast_group_bundle *
248 mcast_group_bundle_lookup(struct mcast_snooping *ms OVS_UNUSED,
249 struct mcast_group *grp, void *port)
250 OVS_REQ_RDLOCK(ms->rwlock)
252 struct mcast_group_bundle *b;
254 LIST_FOR_EACH (b, bundle_node, &grp->bundle_lru) {
255 if (b->port == port) {
262 /* Insert a new bundle to the mcast group or update its
263 * position and expiration if it is already there. */
264 static struct mcast_group_bundle *
265 mcast_group_insert_bundle(struct mcast_snooping *ms OVS_UNUSED,
266 struct mcast_group *grp, void *port, int idle_time)
267 OVS_REQ_WRLOCK(ms->rwlock)
269 struct mcast_group_bundle *b;
271 b = mcast_group_bundle_lookup(ms, grp, port);
273 list_remove(&b->bundle_node);
275 b = xmalloc(sizeof *b);
276 list_init(&b->bundle_node);
280 b->expires = time_now() + idle_time;
281 list_push_back(&grp->bundle_lru, &b->bundle_node);
285 /* Return true if multicast still has bundles associated.
286 * Return false if there is no bundles. */
288 mcast_group_has_bundles(struct mcast_group *grp)
290 return !list_is_empty(&grp->bundle_lru);
293 /* Delete 'grp' from the 'ms' hash table.
294 * Caller is responsible to clean bundle lru first. */
296 mcast_snooping_flush_group__(struct mcast_snooping *ms,
297 struct mcast_group *grp)
299 ovs_assert(list_is_empty(&grp->bundle_lru));
300 hmap_remove(&ms->table, &grp->hmap_node);
301 list_remove(&grp->group_node);
305 /* Flush out mcast group and its bundles */
307 mcast_snooping_flush_group(struct mcast_snooping *ms, struct mcast_group *grp)
308 OVS_REQ_WRLOCK(ms->rwlock)
310 struct mcast_group_bundle *b;
312 LIST_FOR_EACH_POP (b, bundle_node, &grp->bundle_lru) {
315 mcast_snooping_flush_group__(ms, grp);
316 ms->need_revalidate = true;
320 /* Delete bundle returning true if it succeeds,
321 * false if it didn't find the group. */
323 mcast_group_delete_bundle(struct mcast_snooping *ms OVS_UNUSED,
324 struct mcast_group *grp, void *port)
325 OVS_REQ_WRLOCK(ms->rwlock)
327 struct mcast_group_bundle *b;
329 LIST_FOR_EACH (b, bundle_node, &grp->bundle_lru) {
330 if (b->port == port) {
331 list_remove(&b->bundle_node);
339 /* If any bundle has expired, delete it. Returns the number of deleted
342 mcast_snooping_prune_expired(struct mcast_snooping *ms,
343 struct mcast_group *grp)
344 OVS_REQ_WRLOCK(ms->rwlock)
347 struct mcast_group_bundle *b, *next_b;
348 time_t timenow = time_now();
351 LIST_FOR_EACH_SAFE (b, next_b, bundle_node, &grp->bundle_lru) {
352 /* This list is sorted on expiration time. */
353 if (b->expires > timenow) {
356 list_remove(&b->bundle_node);
361 if (!mcast_group_has_bundles(grp)) {
362 mcast_snooping_flush_group__(ms, grp);
367 ms->need_revalidate = true;
368 COVERAGE_ADD(mcast_snooping_expired, expired);
374 /* Add a multicast group to the mdb. If it exists, then
375 * move to the last position in the LRU list.
378 mcast_snooping_add_group(struct mcast_snooping *ms, ovs_be32 ip4,
379 uint16_t vlan, void *port)
380 OVS_REQ_WRLOCK(ms->rwlock)
383 struct mcast_group *grp;
385 /* Avoid duplicate packets. */
386 if (mcast_snooping_mrouter_lookup(ms, vlan, port)
387 || mcast_snooping_port_lookup(&ms->fport_list, port)) {
392 grp = mcast_snooping_lookup(ms, ip4, vlan);
394 uint32_t hash = mcast_table_hash(ms, ip4, vlan);
396 if (hmap_count(&ms->table) >= ms->max_entries) {
397 group_get_lru(ms, &grp);
398 mcast_snooping_flush_group(ms, grp);
401 grp = xmalloc(sizeof *grp);
402 hmap_insert(&ms->table, &grp->hmap_node, hash);
405 list_init(&grp->bundle_lru);
407 ms->need_revalidate = true;
408 COVERAGE_INC(mcast_snooping_learned);
410 list_remove(&grp->group_node);
412 mcast_group_insert_bundle(ms, grp, port, ms->idle_time);
414 /* Mark 'grp' as recently used. */
415 list_push_back(&ms->group_lru, &grp->group_node);
420 mcast_snooping_leave_group(struct mcast_snooping *ms, ovs_be32 ip4,
421 uint16_t vlan, void *port)
422 OVS_REQ_WRLOCK(ms->rwlock)
424 struct mcast_group *grp;
426 /* Ports flagged to forward Reports usually have more
427 * than one host behind it, so don't leave the group
428 * on the first message and just let it expire */
429 if (mcast_snooping_port_lookup(&ms->rport_list, port)) {
433 grp = mcast_snooping_lookup(ms, ip4, vlan);
434 if (grp && mcast_group_delete_bundle(ms, grp, port)) {
435 ms->need_revalidate = true;
444 /* Returns the number of seconds since the multicast router
445 * was learned in a port. */
447 mcast_mrouter_age(const struct mcast_snooping *ms OVS_UNUSED,
448 const struct mcast_mrouter_bundle *mrouter)
450 time_t remaining = mrouter->expires - time_now();
451 return MCAST_MROUTER_PORT_IDLE_TIME - remaining;
454 static struct mcast_mrouter_bundle *
455 mcast_mrouter_from_lru_node(struct ovs_list *list)
457 return CONTAINER_OF(list, struct mcast_mrouter_bundle, mrouter_node);
460 /* If the LRU list is not empty, stores the least-recently-used mrouter
461 * in '*m' and returns true. Otherwise, if the LRU list is empty,
462 * stores NULL in '*m' and return false. */
464 mrouter_get_lru(const struct mcast_snooping *ms,
465 struct mcast_mrouter_bundle **m)
466 OVS_REQ_RDLOCK(ms->rwlock)
468 if (!list_is_empty(&ms->mrouter_lru)) {
469 *m = mcast_mrouter_from_lru_node(ms->mrouter_lru.next);
477 static struct mcast_mrouter_bundle *
478 mcast_snooping_mrouter_lookup(struct mcast_snooping *ms, uint16_t vlan,
480 OVS_REQ_RDLOCK(ms->rwlock)
482 struct mcast_mrouter_bundle *mrouter;
484 LIST_FOR_EACH (mrouter, mrouter_node, &ms->mrouter_lru) {
485 if (mrouter->vlan == vlan && mrouter->port == port) {
493 mcast_snooping_add_mrouter(struct mcast_snooping *ms, uint16_t vlan,
495 OVS_REQ_WRLOCK(ms->rwlock)
497 struct mcast_mrouter_bundle *mrouter;
499 /* Avoid duplicate packets. */
500 if (mcast_snooping_port_lookup(&ms->fport_list, port)) {
504 mrouter = mcast_snooping_mrouter_lookup(ms, vlan, port);
506 list_remove(&mrouter->mrouter_node);
508 mrouter = xmalloc(sizeof *mrouter);
509 mrouter->vlan = vlan;
510 mrouter->port = port;
511 COVERAGE_INC(mcast_snooping_learned);
512 ms->need_revalidate = true;
515 mrouter->expires = time_now() + MCAST_MROUTER_PORT_IDLE_TIME;
516 list_push_back(&ms->mrouter_lru, &mrouter->mrouter_node);
517 return ms->need_revalidate;
521 mcast_snooping_flush_mrouter(struct mcast_mrouter_bundle *mrouter)
523 list_remove(&mrouter->mrouter_node);
529 static struct mcast_port_bundle *
530 mcast_port_from_list_node(struct ovs_list *list)
532 return CONTAINER_OF(list, struct mcast_port_bundle, node);
535 /* If the list is not empty, stores the fport in '*f' and returns true.
536 * Otherwise, if the list is empty, stores NULL in '*f' and return false. */
538 mcast_snooping_port_get(const struct ovs_list *list,
539 struct mcast_port_bundle **f)
541 if (!list_is_empty(list)) {
542 *f = mcast_port_from_list_node(list->next);
550 static struct mcast_port_bundle *
551 mcast_snooping_port_lookup(struct ovs_list *list, void *port)
553 struct mcast_port_bundle *pbundle;
555 LIST_FOR_EACH (pbundle, node, list) {
556 if (pbundle->port == port) {
564 mcast_snooping_add_port(struct ovs_list *list, void *port)
566 struct mcast_port_bundle *pbundle;
568 pbundle = xmalloc(sizeof *pbundle);
569 pbundle->port = port;
570 list_insert(list, &pbundle->node);
574 mcast_snooping_flush_port(struct mcast_port_bundle *pbundle)
576 list_remove(&pbundle->node);
583 mcast_snooping_set_port_flood(struct mcast_snooping *ms, void *port,
585 OVS_REQ_WRLOCK(ms->rwlock)
587 struct mcast_port_bundle *fbundle;
589 fbundle = mcast_snooping_port_lookup(&ms->fport_list, port);
590 if (flood && !fbundle) {
591 mcast_snooping_add_port(&ms->fport_list, port);
592 ms->need_revalidate = true;
593 } else if (!flood && fbundle) {
594 mcast_snooping_flush_port(fbundle);
595 ms->need_revalidate = true;
599 /* Flood Reports ports. */
602 mcast_snooping_set_port_flood_reports(struct mcast_snooping *ms, void *port,
604 OVS_REQ_WRLOCK(ms->rwlock)
606 struct mcast_port_bundle *pbundle;
608 pbundle = mcast_snooping_port_lookup(&ms->rport_list, port);
609 if (flood && !pbundle) {
610 mcast_snooping_add_port(&ms->rport_list, port);
611 ms->need_revalidate = true;
612 } else if (!flood && pbundle) {
613 mcast_snooping_flush_port(pbundle);
614 ms->need_revalidate = true;
621 mcast_snooping_mdb_flush__(struct mcast_snooping *ms)
622 OVS_REQ_WRLOCK(ms->rwlock)
624 struct mcast_group *grp;
625 struct mcast_mrouter_bundle *mrouter;
627 while (group_get_lru(ms, &grp)) {
628 mcast_snooping_flush_group(ms, grp);
631 hmap_shrink(&ms->table);
633 while (mrouter_get_lru(ms, &mrouter)) {
634 mcast_snooping_flush_mrouter(mrouter);
639 mcast_snooping_mdb_flush(struct mcast_snooping *ms)
641 if (!mcast_snooping_enabled(ms)) {
645 ovs_rwlock_wrlock(&ms->rwlock);
646 mcast_snooping_mdb_flush__(ms);
647 ovs_rwlock_unlock(&ms->rwlock);
650 /* Flushes mdb and flood ports. */
652 mcast_snooping_flush__(struct mcast_snooping *ms)
653 OVS_REQ_WRLOCK(ms->rwlock)
655 struct mcast_group *grp;
656 struct mcast_mrouter_bundle *mrouter;
657 struct mcast_port_bundle *pbundle;
659 while (group_get_lru(ms, &grp)) {
660 mcast_snooping_flush_group(ms, grp);
663 hmap_shrink(&ms->table);
665 /* flush multicast routers */
666 while (mrouter_get_lru(ms, &mrouter)) {
667 mcast_snooping_flush_mrouter(mrouter);
670 /* flush flood ports */
671 while (mcast_snooping_port_get(&ms->fport_list, &pbundle)) {
672 mcast_snooping_flush_port(pbundle);
675 /* flush flood report ports */
676 while (mcast_snooping_port_get(&ms->rport_list, &pbundle)) {
677 mcast_snooping_flush_port(pbundle);
682 mcast_snooping_flush(struct mcast_snooping *ms)
684 if (!mcast_snooping_enabled(ms)) {
688 ovs_rwlock_wrlock(&ms->rwlock);
689 mcast_snooping_flush__(ms);
690 ovs_rwlock_unlock(&ms->rwlock);
694 mcast_snooping_run__(struct mcast_snooping *ms)
695 OVS_REQ_WRLOCK(ms->rwlock)
697 bool need_revalidate;
698 struct mcast_group *grp;
699 struct mcast_mrouter_bundle *mrouter;
702 while (group_get_lru(ms, &grp)) {
703 if (hmap_count(&ms->table) > ms->max_entries) {
704 mcast_snooping_flush_group(ms, grp);
706 if (!mcast_snooping_prune_expired(ms, grp)) {
712 hmap_shrink(&ms->table);
715 while (mrouter_get_lru(ms, &mrouter)
716 && time_now() >= mrouter->expires) {
717 mcast_snooping_flush_mrouter(mrouter);
721 if (mrouter_expired) {
722 ms->need_revalidate = true;
723 COVERAGE_ADD(mcast_snooping_expired, mrouter_expired);
726 need_revalidate = ms->need_revalidate;
727 ms->need_revalidate = false;
728 return need_revalidate;
731 /* Does periodic work required by 'ms'. Returns true if something changed
732 * that may require flow revalidation. */
734 mcast_snooping_run(struct mcast_snooping *ms)
736 bool need_revalidate;
738 if (!mcast_snooping_enabled(ms)) {
742 ovs_rwlock_wrlock(&ms->rwlock);
743 need_revalidate = mcast_snooping_run__(ms);
744 ovs_rwlock_unlock(&ms->rwlock);
746 return need_revalidate;
750 mcast_snooping_wait__(struct mcast_snooping *ms)
751 OVS_REQ_RDLOCK(ms->rwlock)
753 if (hmap_count(&ms->table) > ms->max_entries
754 || ms->need_revalidate) {
755 poll_immediate_wake();
757 struct mcast_group *grp;
758 struct mcast_group_bundle *bundle;
759 struct mcast_mrouter_bundle *mrouter;
760 long long int mrouter_msec;
761 long long int msec = 0;
763 if (!list_is_empty(&ms->group_lru)) {
764 grp = mcast_group_from_lru_node(ms->group_lru.next);
765 bundle = mcast_group_bundle_from_lru_node(grp->bundle_lru.next);
766 msec = bundle->expires * 1000LL;
769 if (!list_is_empty(&ms->mrouter_lru)) {
770 mrouter = mcast_mrouter_from_lru_node(ms->mrouter_lru.next);
771 mrouter_msec = mrouter->expires * 1000LL;
772 msec = msec ? MIN(msec, mrouter_msec) : mrouter_msec;
776 poll_timer_wait_until(msec);
782 mcast_snooping_wait(struct mcast_snooping *ms)
784 if (!mcast_snooping_enabled(ms)) {
788 ovs_rwlock_rdlock(&ms->rwlock);
789 mcast_snooping_wait__(ms);
790 ovs_rwlock_unlock(&ms->rwlock);