From 96055be9904751252595b3dd061c3783146f69bf Mon Sep 17 00:00:00 2001 From: Nick Gasson Date: Fri, 8 Mar 2024 16:59:35 +0000 Subject: [PATCH] Improve locking benchmark --- src/thread.c | 17 ++++++++--- test/lockbench.c | 76 ++++++++++++++++++++++++++++++++++++------------ 2 files changed, 70 insertions(+), 23 deletions(-) diff --git a/src/thread.c b/src/thread.c index 8785415f..dfd4f304 100644 --- a/src/thread.c +++ b/src/thread.c @@ -50,7 +50,7 @@ #undef task_t #endif -#define LOCK_SPINS 15 +#define LOCK_SPINS 64 #define YIELD_SPINS 32 #define MIN_TAKE 8 #define PARKING_BAYS 64 @@ -104,6 +104,10 @@ STATIC_ASSERT(sizeof(lock_stats_t) == 64) #define TSAN_POST_UNLOCK(addr) #endif +#ifndef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP +#define PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP PTHREAD_MUTEX_INITIALIZER +#endif + #define PTHREAD_CHECK(op, ...) do { \ if (unlikely(op(__VA_ARGS__) != 0)) \ fatal_errno(#op); \ @@ -246,7 +250,7 @@ typedef struct _barrier { static parking_bay_t parking_bays[PARKING_BAYS] = { #ifndef __MINGW32__ [0 ... PARKING_BAYS - 1] = { - PTHREAD_MUTEX_INITIALIZER, + PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP, PTHREAD_COND_INITIALIZER } #endif @@ -268,7 +272,7 @@ static CONDITION_VARIABLE wake_workers = CONDITION_VARIABLE_INIT; static CRITICAL_SECTION wakelock; #else static pthread_cond_t wake_workers = PTHREAD_COND_INITIALIZER; -static pthread_mutex_t wakelock = PTHREAD_MUTEX_INITIALIZER; +static pthread_mutex_t wakelock = PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP; #endif #ifdef POSIX_SUSPEND @@ -398,6 +402,11 @@ static void join_worker_threads(void) } assert(atomic_load(&running_threads) == 1); + +#ifdef DEBUG + for (int i = 0; i < PARKING_BAYS; i++) + assert(parking_bays[i].parked == 0); +#endif } static nvc_thread_t *thread_new(thread_fn_t fn, void *arg, @@ -712,7 +721,7 @@ void nvc_lock(nvc_lock_t *lock) spins++, state = relaxed_load(lock)) spin_wait(); - if (spins == LOCK_SPINS) { + if (state & IS_LOCKED) { // Ignore failures here as we will check the lock state again // in the callback with the park mutex held atomic_cas(lock, IS_LOCKED, IS_LOCKED | HAS_PARKED); diff --git a/test/lockbench.c b/test/lockbench.c index 965c2f57..8643495c 100644 --- a/test/lockbench.c +++ b/test/lockbench.c @@ -22,17 +22,30 @@ #include #include #include -#include #include #include +#include +#include + +#ifdef __MINGW32__ +#define WIN32_LEAN_AND_MEAN +#include +#else +#include +#endif -#define USE_NVC_LOCK +#define USE_NVC_LOCK 1 -#ifdef USE_NVC_LOCK +#if USE_NVC_LOCK #define LOCK_TYPE nvc_lock_t #define LOCK_INIT(m) m = 0 #define LOCK(m) nvc_lock(&m); #define UNLOCK(m) nvc_unlock(&m); +#elif defined __MINGW32__ +#define LOCK_TYPE CRITICAL_SECTION +#define LOCK_INIT(m) InitializeCriticalSectionAndSpinCount(&m, 10); +#define LOCK(m) EnterCriticalSection(&m) +#define UNLOCK(m) LeaveCriticalSection(&m) #else #define LOCK_TYPE pthread_mutex_t #define LOCK_INIT(m) pthread_mutex_init(&m, NULL) @@ -49,29 +62,39 @@ typedef struct { static counter_t counter[N_COUNTERS]; -typedef struct __attribute__((aligned(64))) { +STATIC_ASSERT(sizeof(counter) == N_COUNTERS * 64); + +typedef struct { nvc_thread_t *thread; - int iters; - int last; - int running; -} thread_state_t; + uint32_t rng; + uint64_t iters; + uint64_t last; + int running; +} __attribute__((aligned(64))) thread_state_t; STATIC_ASSERT(sizeof(thread_state_t) == 64); +static inline uint32_t fast_rand(thread_state_t *t) +{ + uint32_t state = t->rng; + state ^= (state << 13); + state ^= (state >> 17); + state ^= (state << 5); + return (t->rng = state); +} + static void *worker_thread(void *arg) { thread_state_t *t = arg; while (relaxed_load(&(t->running))) { - int iters = relaxed_load(&(t->iters)); - - int n = rand() % N_COUNTERS; + int n = fast_rand(t) % N_COUNTERS; LOCK(counter[n].lock); counter[n].value++; UNLOCK(counter[n].lock); - relaxed_store(&(t->iters), iters + 1); + relaxed_add(&(t->iters), 1); } return NULL; @@ -95,13 +118,21 @@ int main(int argc, char **argv) for (int i = 0; i < nproc; i++) { threads[i].running = 1; + threads[i].rng = rand(); threads[i].thread = thread_create(worker_thread, &(threads[i]), "worker %d", i); } + printf("THREADS OPS/MS FAIRNESS\n"); + for (int i = 0; i < 10; i++) { + const uint64_t start = get_timestamp_ns(); + sleep(1); + const uint64_t end = get_timestamp_ns(); + const double secs = (end - start) / 1.0e9; + int total = 0; for (int j = 0; j < N_COUNTERS; j++) { LOCK(counter[j].lock); @@ -110,17 +141,19 @@ int main(int argc, char **argv) UNLOCK(counter[j].lock); } - int min = INT_MAX, max = INT_MIN; + double geo = 1.0, best = 0.0; for (int j = 0; j < nproc; j++) { - int iters = relaxed_load(&(threads[j].iters)); - int delta = iters - threads[j].last; - min = MIN(min, delta); - max = MAX(max, delta); + uint64_t iters = relaxed_load(&(threads[j].iters)); + uint64_t delta = iters - threads[j].last; + const double result = (double)delta / secs; + geo *= result; + if (result > best) + best = result; threads[j].last = iters; } - printf("%d threads; avg:%d min:%d max:%d\n", - nproc, total / nproc, min, max); + const double fair = pow(geo, 1.0 / nproc) / best; + printf("%-8d %-11.1f %.2f\n", nproc, (total / secs) / 1000.0, fair); } for (int i = 0; i < nproc; i++) @@ -129,5 +162,10 @@ int main(int argc, char **argv) for (int i = 0; i < nproc; i++) thread_join(threads[i].thread); +#if USE_NVC_LOCK + for (int i = 0; i < N_COUNTERS; i++) + assert(counter[i].lock == 0); +#endif + return 0; } -- 2.39.2