2 * Copyright (c) 2013, 2014 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 #include "ovs-thread.h"
29 #include "netdev-dpdk.h"
31 #include "poll-loop.h"
33 #include "socket-util.h"
37 /* Omit the definitions in this file because they are somewhat difficult to
38 * write without prompting "sparse" complaints, without ugliness or
39 * cut-and-paste. Since "sparse" is just a checker, not a compiler, it
40 * doesn't matter that we don't define them. */
42 #include "openvswitch/vlog.h"
44 VLOG_DEFINE_THIS_MODULE(ovs_thread);
46 /* If there is a reason that we cannot fork anymore (unless the fork will be
47 * immediately followed by an exec), then this points to a string that
49 static const char *must_not_fork;
51 /* True if we created any threads beyond the main initial thread. */
52 static bool multithreaded;
54 #define LOCK_FUNCTION(TYPE, FUN) \
56 ovs_##TYPE##_##FUN##_at(const struct ovs_##TYPE *l_, \
58 OVS_NO_THREAD_SAFETY_ANALYSIS \
60 struct ovs_##TYPE *l = CONST_CAST(struct ovs_##TYPE *, l_); \
63 /* Verify that 'l' was initialized. */ \
64 if (OVS_UNLIKELY(!l->where)) { \
65 ovs_abort(0, "%s: %s() passed uninitialized ovs_"#TYPE, \
69 error = pthread_##TYPE##_##FUN(&l->lock); \
70 if (OVS_UNLIKELY(error)) { \
71 ovs_abort(error, "%s: pthread_%s_%s failed", where, #TYPE, #FUN); \
75 LOCK_FUNCTION(mutex, lock);
76 LOCK_FUNCTION(rwlock, rdlock);
77 LOCK_FUNCTION(rwlock, wrlock);
79 #define TRY_LOCK_FUNCTION(TYPE, FUN) \
81 ovs_##TYPE##_##FUN##_at(const struct ovs_##TYPE *l_, \
83 OVS_NO_THREAD_SAFETY_ANALYSIS \
85 struct ovs_##TYPE *l = CONST_CAST(struct ovs_##TYPE *, l_); \
88 /* Verify that 'l' was initialized. */ \
89 if (OVS_UNLIKELY(!l->where)) { \
90 ovs_abort(0, "%s: %s() passed uninitialized ovs_"#TYPE, \
94 error = pthread_##TYPE##_##FUN(&l->lock); \
95 if (OVS_UNLIKELY(error) && error != EBUSY) { \
96 ovs_abort(error, "%s: pthread_%s_%s failed", where, #TYPE, #FUN); \
103 TRY_LOCK_FUNCTION(mutex, trylock);
104 TRY_LOCK_FUNCTION(rwlock, tryrdlock);
105 TRY_LOCK_FUNCTION(rwlock, trywrlock);
107 #define UNLOCK_FUNCTION(TYPE, FUN, WHERE) \
109 ovs_##TYPE##_##FUN(const struct ovs_##TYPE *l_) \
110 OVS_NO_THREAD_SAFETY_ANALYSIS \
112 struct ovs_##TYPE *l = CONST_CAST(struct ovs_##TYPE *, l_); \
115 /* Verify that 'l' was initialized. */ \
116 ovs_assert(l->where); \
119 error = pthread_##TYPE##_##FUN(&l->lock); \
120 if (OVS_UNLIKELY(error)) { \
121 ovs_abort(error, "pthread_%s_%sfailed", #TYPE, #FUN); \
124 UNLOCK_FUNCTION(mutex, unlock, "<unlocked>");
125 UNLOCK_FUNCTION(mutex, destroy, NULL);
126 UNLOCK_FUNCTION(rwlock, unlock, "<unlocked>");
127 UNLOCK_FUNCTION(rwlock, destroy, NULL);
129 #define XPTHREAD_FUNC1(FUNCTION, PARAM1) \
131 x##FUNCTION(PARAM1 arg1) \
133 int error = FUNCTION(arg1); \
134 if (OVS_UNLIKELY(error)) { \
135 ovs_abort(error, "%s failed", #FUNCTION); \
138 #define XPTHREAD_FUNC2(FUNCTION, PARAM1, PARAM2) \
140 x##FUNCTION(PARAM1 arg1, PARAM2 arg2) \
142 int error = FUNCTION(arg1, arg2); \
143 if (OVS_UNLIKELY(error)) { \
144 ovs_abort(error, "%s failed", #FUNCTION); \
147 #define XPTHREAD_FUNC3(FUNCTION, PARAM1, PARAM2, PARAM3)\
149 x##FUNCTION(PARAM1 arg1, PARAM2 arg2, PARAM3 arg3) \
151 int error = FUNCTION(arg1, arg2, arg3); \
152 if (OVS_UNLIKELY(error)) { \
153 ovs_abort(error, "%s failed", #FUNCTION); \
157 XPTHREAD_FUNC1(pthread_mutex_lock, pthread_mutex_t *);
158 XPTHREAD_FUNC1(pthread_mutex_unlock, pthread_mutex_t *);
159 XPTHREAD_FUNC1(pthread_mutexattr_init, pthread_mutexattr_t *);
160 XPTHREAD_FUNC1(pthread_mutexattr_destroy, pthread_mutexattr_t *);
161 XPTHREAD_FUNC2(pthread_mutexattr_settype, pthread_mutexattr_t *, int);
162 XPTHREAD_FUNC2(pthread_mutexattr_gettype, pthread_mutexattr_t *, int *);
164 XPTHREAD_FUNC1(pthread_rwlockattr_init, pthread_rwlockattr_t *);
165 XPTHREAD_FUNC1(pthread_rwlockattr_destroy, pthread_rwlockattr_t *);
166 #ifdef PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
167 XPTHREAD_FUNC2(pthread_rwlockattr_setkind_np, pthread_rwlockattr_t *, int);
170 XPTHREAD_FUNC2(pthread_cond_init, pthread_cond_t *, pthread_condattr_t *);
171 XPTHREAD_FUNC1(pthread_cond_destroy, pthread_cond_t *);
172 XPTHREAD_FUNC1(pthread_cond_signal, pthread_cond_t *);
173 XPTHREAD_FUNC1(pthread_cond_broadcast, pthread_cond_t *);
175 XPTHREAD_FUNC2(pthread_join, pthread_t, void **);
177 typedef void destructor_func(void *);
178 XPTHREAD_FUNC2(pthread_key_create, pthread_key_t *, destructor_func *);
179 XPTHREAD_FUNC1(pthread_key_delete, pthread_key_t);
180 XPTHREAD_FUNC2(pthread_setspecific, pthread_key_t, const void *);
183 XPTHREAD_FUNC3(pthread_sigmask, int, const sigset_t *, sigset_t *);
187 ovs_mutex_init__(const struct ovs_mutex *l_, int type)
189 struct ovs_mutex *l = CONST_CAST(struct ovs_mutex *, l_);
190 pthread_mutexattr_t attr;
193 l->where = "<unlocked>";
194 xpthread_mutexattr_init(&attr);
195 xpthread_mutexattr_settype(&attr, type);
196 error = pthread_mutex_init(&l->lock, &attr);
197 if (OVS_UNLIKELY(error)) {
198 ovs_abort(error, "pthread_mutex_init failed");
200 xpthread_mutexattr_destroy(&attr);
203 /* Initializes 'mutex' as a normal (non-recursive) mutex. */
205 ovs_mutex_init(const struct ovs_mutex *mutex)
207 ovs_mutex_init__(mutex, PTHREAD_MUTEX_ERRORCHECK);
210 /* Initializes 'mutex' as a recursive mutex. */
212 ovs_mutex_init_recursive(const struct ovs_mutex *mutex)
214 ovs_mutex_init__(mutex, PTHREAD_MUTEX_RECURSIVE);
217 /* Initializes 'mutex' as a recursive mutex. */
219 ovs_mutex_init_adaptive(const struct ovs_mutex *mutex)
221 #ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
222 ovs_mutex_init__(mutex, PTHREAD_MUTEX_ADAPTIVE_NP);
224 ovs_mutex_init(mutex);
229 ovs_rwlock_init(const struct ovs_rwlock *l_)
231 struct ovs_rwlock *l = CONST_CAST(struct ovs_rwlock *, l_);
232 pthread_rwlockattr_t attr;
235 l->where = "<unlocked>";
237 xpthread_rwlockattr_init(&attr);
238 #ifdef PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
239 xpthread_rwlockattr_setkind_np(
240 &attr, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP);
242 error = pthread_rwlock_init(&l->lock, NULL);
243 if (OVS_UNLIKELY(error)) {
244 ovs_abort(error, "pthread_rwlock_init failed");
246 xpthread_rwlockattr_destroy(&attr);
250 ovs_mutex_cond_wait(pthread_cond_t *cond, const struct ovs_mutex *mutex_)
252 struct ovs_mutex *mutex = CONST_CAST(struct ovs_mutex *, mutex_);
255 ovsrcu_quiesce_start();
256 error = pthread_cond_wait(cond, &mutex->lock);
257 ovsrcu_quiesce_end();
259 if (OVS_UNLIKELY(error)) {
260 ovs_abort(error, "pthread_cond_wait failed");
264 /* Initializes the 'barrier'. 'size' is the number of threads
265 * expected to hit the barrier. */
267 ovs_barrier_init(struct ovs_barrier *barrier, uint32_t size)
269 barrier->size = size;
270 atomic_count_init(&barrier->count, 0);
271 barrier->seq = seq_create();
274 /* Destroys the 'barrier'. */
276 ovs_barrier_destroy(struct ovs_barrier *barrier)
278 seq_destroy(barrier->seq);
281 /* Makes the calling thread block on the 'barrier' until all
282 * 'barrier->size' threads hit the barrier.
283 * ovs_barrier provides the necessary acquire-release semantics to make
284 * the effects of prior memory accesses of all the participating threads
285 * visible on return and to prevent the following memory accesses to be
286 * reordered before the ovs_barrier_block(). */
288 ovs_barrier_block(struct ovs_barrier *barrier)
290 uint64_t seq = seq_read(barrier->seq);
293 orig = atomic_count_inc(&barrier->count);
294 if (orig + 1 == barrier->size) {
295 atomic_count_set(&barrier->count, 0);
296 /* seq_change() serves as a release barrier against the other threads,
297 * so the zeroed count is visible to them as they continue. */
298 seq_change(barrier->seq);
300 /* To prevent thread from waking up by other event,
301 * keeps waiting for the change of 'barrier->seq'. */
302 while (seq == seq_read(barrier->seq)) {
303 seq_wait(barrier->seq, seq);
309 DEFINE_EXTERN_PER_THREAD_DATA(ovsthread_id, 0);
311 struct ovsthread_aux {
312 void *(*start)(void *);
318 ovsthread_wrapper(void *aux_)
320 static atomic_count next_id = ATOMIC_COUNT_INIT(1);
322 struct ovsthread_aux *auxp = aux_;
323 struct ovsthread_aux aux;
326 id = atomic_count_inc(&next_id);
327 *ovsthread_id_get() = id;
332 /* The order of the following calls is important, because
333 * ovsrcu_quiesce_end() saves a copy of the thread name. */
334 set_subprogram_name("%s%u", aux.name, id);
335 ovsrcu_quiesce_end();
339 return aux.start(aux.arg);
342 /* Starts a thread that calls 'start(arg)'. Sets the thread's name to 'name'
343 * (suffixed by its ovsthread_id()). Returns the new thread's pthread_t. */
345 ovs_thread_create(const char *name, void *(*start)(void *), void *arg)
347 struct ovsthread_aux *aux;
351 forbid_forking("multiple threads exist");
352 multithreaded = true;
353 ovsrcu_quiesce_end();
355 aux = xmalloc(sizeof *aux);
358 ovs_strlcpy(aux->name, name, sizeof aux->name);
360 error = pthread_create(&thread, NULL, ovsthread_wrapper, aux);
362 ovs_abort(error, "pthread_create failed");
368 ovsthread_once_start__(struct ovsthread_once *once)
370 ovs_mutex_lock(&once->mutex);
371 /* Mutex synchronizes memory, so we get the current value of 'done'. */
375 ovs_mutex_unlock(&once->mutex);
380 ovsthread_once_done(struct ovsthread_once *once)
382 /* We need release semantics here, so that the following store may not
383 * be moved ahead of any of the preceding initialization operations.
384 * A release atomic_thread_fence provides that prior memory accesses
385 * will not be reordered to take place after the following store. */
386 atomic_thread_fence(memory_order_release);
388 ovs_mutex_unlock(&once->mutex);
392 single_threaded(void)
394 return !multithreaded;
397 /* Asserts that the process has not yet created any threads (beyond the initial
400 * ('where' is used in logging. Commonly one would use
401 * assert_single_threaded() to automatically provide the caller's source file
402 * and line number for 'where'.) */
404 assert_single_threaded_at(const char *where)
407 VLOG_FATAL("%s: attempted operation not allowed when multithreaded",
413 /* Forks the current process (checking that this is allowed). Aborts with
414 * VLOG_FATAL if fork() returns an error, and otherwise returns the value
415 * returned by fork().
417 * ('where' is used in logging. Commonly one would use xfork() to
418 * automatically provide the caller's source file and line number for
421 xfork_at(const char *where)
426 VLOG_FATAL("%s: attempted to fork but forking not allowed (%s)",
427 where, must_not_fork);
432 VLOG_FATAL("%s: fork failed (%s)", where, ovs_strerror(errno));
438 /* Notes that the process must not call fork() from now on, for the specified
439 * 'reason'. (The process may still fork() if it execs itself immediately
442 forbid_forking(const char *reason)
444 ovs_assert(reason != NULL);
445 must_not_fork = reason;
448 /* Returns true if the process is allowed to fork, false otherwise. */
452 return !must_not_fork;
455 /* ovsthread_stats. */
458 ovsthread_stats_init(struct ovsthread_stats *stats)
462 ovs_mutex_init(&stats->mutex);
463 for (i = 0; i < ARRAY_SIZE(stats->buckets); i++) {
464 stats->buckets[i] = NULL;
469 ovsthread_stats_destroy(struct ovsthread_stats *stats)
471 ovs_mutex_destroy(&stats->mutex);
475 ovsthread_stats_bucket_get(struct ovsthread_stats *stats,
476 void *(*new_bucket)(void))
478 unsigned int idx = ovsthread_id_self() & (ARRAY_SIZE(stats->buckets) - 1);
479 void *bucket = stats->buckets[idx];
481 ovs_mutex_lock(&stats->mutex);
482 bucket = stats->buckets[idx];
484 bucket = stats->buckets[idx] = new_bucket();
486 ovs_mutex_unlock(&stats->mutex);
492 ovs_thread_stats_next_bucket(const struct ovsthread_stats *stats, size_t i)
494 for (; i < ARRAY_SIZE(stats->buckets); i++) {
495 if (stats->buckets[i]) {
503 /* Parses /proc/cpuinfo for the total number of physical cores on this system
504 * across all CPU packages, not counting hyper-threads.
506 * Sets *n_cores to the total number of cores on this system, or 0 if the
507 * number cannot be determined. */
509 parse_cpuinfo(long int *n_cores)
511 static const char file_name[] = "/proc/cpuinfo";
513 uint64_t cpu = 0; /* Support up to 64 CPU packages on a single system. */
517 stream = fopen(file_name, "r");
519 VLOG_DBG("%s: open failed (%s)", file_name, ovs_strerror(errno));
523 while (fgets(line, sizeof line, stream)) {
526 /* Find the next CPU package. */
527 if (ovs_scan(line, "physical id%*[^:]: %u", &id)) {
529 VLOG_WARN("Counted over 64 CPU packages on this system. "
530 "Parsing %s for core count may be inaccurate.",
536 if (cpu & (1ULL << id)) {
537 /* We've already counted this package's cores. */
542 /* Find the number of cores for this package. */
543 while (fgets(line, sizeof line, stream)) {
546 if (ovs_scan(line, "cpu cores%*[^:]: %u", &count)) {
558 /* Returns the total number of cores on this system, or 0 if the number cannot
561 * Tries not to count hyper-threads, but may be inaccurate - particularly on
562 * platforms that do not provide /proc/cpuinfo, but also if /proc/cpuinfo is
563 * formatted different to the layout that parse_cpuinfo() expects. */
565 count_cpu_cores(void)
567 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
568 static long int n_cores;
570 if (ovsthread_once_start(&once)) {
572 parse_cpuinfo(&n_cores);
574 n_cores = sysconf(_SC_NPROCESSORS_ONLN);
578 GetSystemInfo(&sysinfo);
579 n_cores = sysinfo.dwNumberOfProcessors;
581 ovsthread_once_done(&once);
584 return n_cores > 0 ? n_cores : 0;
591 #define MAX_KEYS (L1_SIZE * L2_SIZE)
593 /* A piece of thread-specific data. */
594 struct ovsthread_key {
595 struct ovs_list list_node; /* In 'inuse_keys' or 'free_keys'. */
596 void (*destructor)(void *); /* Called at thread exit. */
598 /* Indexes into the per-thread array in struct ovsthread_key_slots.
599 * This key's data is stored in p1[index / L2_SIZE][index % L2_SIZE]. */
603 /* Per-thread data structure. */
604 struct ovsthread_key_slots {
605 struct ovs_list list_node; /* In 'slots_list'. */
609 /* Contains "struct ovsthread_key_slots *". */
610 static pthread_key_t tsd_key;
612 /* Guards data structures below. */
613 static struct ovs_mutex key_mutex = OVS_MUTEX_INITIALIZER;
615 /* 'inuse_keys' holds "struct ovsthread_key"s that have been created and not
618 * 'free_keys' holds "struct ovsthread_key"s that have been deleted and are
619 * ready for reuse. (We keep them around only to be able to easily locate
622 * Together, 'inuse_keys' and 'free_keys' hold an ovsthread_key for every index
623 * from 0 to n_keys - 1, inclusive. */
624 static struct ovs_list inuse_keys OVS_GUARDED_BY(key_mutex)
625 = OVS_LIST_INITIALIZER(&inuse_keys);
626 static struct ovs_list free_keys OVS_GUARDED_BY(key_mutex)
627 = OVS_LIST_INITIALIZER(&free_keys);
628 static unsigned int n_keys OVS_GUARDED_BY(key_mutex);
630 /* All existing struct ovsthread_key_slots. */
631 static struct ovs_list slots_list OVS_GUARDED_BY(key_mutex)
632 = OVS_LIST_INITIALIZER(&slots_list);
635 clear_slot(struct ovsthread_key_slots *slots, unsigned int index)
637 void **p2 = slots->p1[index / L2_SIZE];
639 void **valuep = &p2[index % L2_SIZE];
640 void *value = *valuep;
649 ovsthread_key_destruct__(void *slots_)
651 struct ovsthread_key_slots *slots = slots_;
652 struct ovsthread_key *key;
656 ovs_mutex_lock(&key_mutex);
657 list_remove(&slots->list_node);
658 LIST_FOR_EACH (key, list_node, &inuse_keys) {
659 void *value = clear_slot(slots, key->index);
660 if (value && key->destructor) {
661 key->destructor(value);
665 ovs_mutex_unlock(&key_mutex);
667 for (i = 0; i < n / L2_SIZE; i++) {
673 /* Initializes '*keyp' as a thread-specific data key. The data items are
674 * initially null in all threads.
676 * If a thread exits with non-null data, then 'destructor', if nonnull, will be
677 * called passing the final data value as its argument. 'destructor' must not
678 * call any thread-specific data functions in this API.
680 * This function is similar to xpthread_key_create(). */
682 ovsthread_key_create(ovsthread_key_t *keyp, void (*destructor)(void *))
684 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
685 struct ovsthread_key *key;
687 if (ovsthread_once_start(&once)) {
688 xpthread_key_create(&tsd_key, ovsthread_key_destruct__);
689 ovsthread_once_done(&once);
692 ovs_mutex_lock(&key_mutex);
693 if (list_is_empty(&free_keys)) {
694 key = xmalloc(sizeof *key);
695 key->index = n_keys++;
696 if (key->index >= MAX_KEYS) {
700 key = CONTAINER_OF(list_pop_back(&free_keys),
701 struct ovsthread_key, list_node);
703 list_push_back(&inuse_keys, &key->list_node);
704 key->destructor = destructor;
705 ovs_mutex_unlock(&key_mutex);
710 /* Frees 'key'. The destructor supplied to ovsthread_key_create(), if any, is
713 * This function is similar to xpthread_key_delete(). */
715 ovsthread_key_delete(ovsthread_key_t key)
717 struct ovsthread_key_slots *slots;
719 ovs_mutex_lock(&key_mutex);
721 /* Move 'key' from 'inuse_keys' to 'free_keys'. */
722 list_remove(&key->list_node);
723 list_push_back(&free_keys, &key->list_node);
725 /* Clear this slot in all threads. */
726 LIST_FOR_EACH (slots, list_node, &slots_list) {
727 clear_slot(slots, key->index);
730 ovs_mutex_unlock(&key_mutex);
734 ovsthread_key_lookup__(const struct ovsthread_key *key)
736 struct ovsthread_key_slots *slots;
739 slots = pthread_getspecific(tsd_key);
741 slots = xzalloc(sizeof *slots);
743 ovs_mutex_lock(&key_mutex);
744 pthread_setspecific(tsd_key, slots);
745 list_push_back(&slots_list, &slots->list_node);
746 ovs_mutex_unlock(&key_mutex);
749 p2 = slots->p1[key->index / L2_SIZE];
751 p2 = xzalloc(L2_SIZE * sizeof *p2);
752 slots->p1[key->index / L2_SIZE] = p2;
755 return &p2[key->index % L2_SIZE];
758 /* Sets the value of thread-specific data item 'key', in the current thread, to
761 * This function is similar to pthread_setspecific(). */
763 ovsthread_setspecific(ovsthread_key_t key, const void *value)
765 *ovsthread_key_lookup__(key) = CONST_CAST(void *, value);
768 /* Returns the value of thread-specific data item 'key' in the current thread.
770 * This function is similar to pthread_getspecific(). */
772 ovsthread_getspecific(ovsthread_key_t key)
774 return *ovsthread_key_lookup__(key);