From 695f74035d262b0f17b434c2bb35430413b9270a Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Tue, 3 Oct 2023 16:58:42 -0700 Subject: [PATCH] Use CLK_TCK for clock_nanosleep() spin threshold This more accurately reflects how the kernels actually implement this function and it most importantly avoids incurring startup latency. --- libc/{runtime => calls}/clktck.c | 0 libc/calls/clock_nanosleep.c | 75 ++++++++--------------------- libc/nexgen32e/yield.h | 16 ++++++ tool/viz/clock_nanosleep_accuracy.c | 72 +++++++++++++++++++++++++++ 4 files changed, 108 insertions(+), 55 deletions(-) rename libc/{runtime => calls}/clktck.c (100%) create mode 100644 libc/nexgen32e/yield.h create mode 100644 tool/viz/clock_nanosleep_accuracy.c diff --git a/libc/runtime/clktck.c b/libc/calls/clktck.c similarity index 100% rename from libc/runtime/clktck.c rename to libc/calls/clktck.c diff --git a/libc/calls/clock_nanosleep.c b/libc/calls/clock_nanosleep.c index cf4900e8cbc..b8ff850f402 100644 --- a/libc/calls/clock_nanosleep.c +++ b/libc/calls/clock_nanosleep.c @@ -17,31 +17,20 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/assert.h" -#include "libc/calls/asan.internal.h" -#include "libc/calls/blockcancel.internal.h" -#include "libc/calls/blocksigs.internal.h" -#include "libc/calls/calls.h" #include "libc/calls/cp.internal.h" -#include "libc/calls/state.internal.h" -#include "libc/calls/struct/timespec.h" #include "libc/calls/struct/timespec.internal.h" -#include "libc/calls/struct/timeval.h" -#include "libc/calls/struct/timeval.internal.h" #include "libc/dce.h" #include "libc/errno.h" #include "libc/intrin/describeflags.internal.h" #include "libc/intrin/strace.internal.h" #include "libc/intrin/weaken.h" -#include "libc/macros.internal.h" -#include "libc/nt/ntdll.h" +#include "libc/nexgen32e/yield.h" +#include "libc/runtime/clktck.h" #include "libc/str/str.h" #include "libc/sysv/consts/clock.h" #include "libc/sysv/consts/timer.h" #include "libc/sysv/errfuns.h" #include "libc/thread/thread.h" -#include "libc/thread/tls.h" - -static int64_t g_nanosleep_latency; static errno_t sys_clock_nanosleep(int clock, int flags, const struct timespec *req, @@ -65,33 +54,21 @@ static errno_t sys_clock_nanosleep(int clock, int flags, errno = e; } END_CANCELLATION_POINT; +#if 0 + STRACE("sys_clock_nanosleep(%s, %s, %s, [%s]) → %d% m", + DescribeClockName(clock), DescribeSleepFlags(flags), + DescribeTimespec(0, req), DescribeTimespec(rc, rem), rc); +#endif return rc; } -// determine sched_yield() vs. clock_nanosleep() threshold -// 1ns sys_clock_nanosleep() on Windows takes milliseconds :'( -// 1ns sys_clock_nanosleep() on Linux/FreeBSD takes tens of microseconds -// 1ns sys_clock_nanosleep() on OpenBSD/NetBSD takes tens of milliseconds D: -static struct timespec GetNanosleepLatency(void) { - errno_t rc; - int64_t nanos; - struct timespec x, y, w = {0, 1}; - if (!(nanos = g_nanosleep_latency)) { - BLOCK_SIGNALS; - for (;;) { - unassert(!clock_gettime(CLOCK_REALTIME_PRECISE, &x)); - rc = sys_clock_nanosleep(CLOCK_REALTIME, 0, &w, 0); - unassert(!rc || rc == EINTR); - if (!rc) { - unassert(!clock_gettime(CLOCK_REALTIME_PRECISE, &y)); - nanos = timespec_tonanos(timespec_sub(y, x)); - g_nanosleep_latency = nanos; - break; - } - } - ALLOW_SIGNALS; - } - return timespec_fromnanos(nanos); +// determine how many nanoseconds it takes before clock_nanosleep() +// starts sleeping with 90 percent accuracy; in other words when we +// ask it to sleep 1 second, it (a) must NEVER sleep for less time, +// and (b) does not sleep for longer than 1.1 seconds of time. what +// ever is below that, thanks but no thanks, we'll just spin yield, +static struct timespec GetNanosleepThreshold(void) { + return timespec_fromnanos(1000000000 / CLK_TCK); } static errno_t CheckCancel(void) { @@ -114,7 +91,7 @@ static errno_t SpinNanosleep(int clock, int flags, const struct timespec *req, } unassert(!clock_gettime(CLOCK_REALTIME, &start)); for (;;) { - pthread_yield(); + spin_yield(); unassert(!clock_gettime(CLOCK_REALTIME, &now)); if (flags & TIMER_ABSTIME) { if (timespec_cmp(now, *req) >= 0) { @@ -143,19 +120,13 @@ static errno_t SpinNanosleep(int clock, int flags, const struct timespec *req, } } +// clock_gettime() takes a few nanoseconds but sys_clock_nanosleep() +// is incapable of sleeping for less than a millisecond on platforms +// such as windows and it's not much prettior on unix systems either static bool ShouldUseSpinNanosleep(int clock, int flags, const struct timespec *req) { errno_t e; struct timespec now; - if (IsWindows()) { - // Our spin technique here is intended to take advantage of the fact - // that sched_yield() takes about a hundred nanoseconds. But Windows - // SleepEx(0, 0) a.k.a. NtYieldExecution() takes a whole millisecond - // and it matters not whether our intent is to yielding or sleeping, - // since we use the SleepEx() function to implement both. Therefore, - // there's no reason to use SpinNanosleep() on Windows. - return false; - } if (clock != CLOCK_REALTIME && // clock != CLOCK_REALTIME_PRECISE && // clock != CLOCK_MONOTONIC && // @@ -164,13 +135,7 @@ static bool ShouldUseSpinNanosleep(int clock, int flags, return false; } if (!flags) { - return timespec_cmp(*req, GetNanosleepLatency()) < 0; - } - // We need a clock_gettime() system call to perform this check if the - // sleep request is an absolute timestamp. So we avoid doing that on - // systems where sleep latency isn't too outrageous. - if (timespec_cmp(GetNanosleepLatency(), timespec_fromnanos(50 * 1000)) < 0) { - return false; + return timespec_cmp(*req, GetNanosleepThreshold()) < 0; } e = errno; if (clock_gettime(clock, &now)) { @@ -179,7 +144,7 @@ static bool ShouldUseSpinNanosleep(int clock, int flags, return false; } return timespec_cmp(*req, now) < 0 || - timespec_cmp(timespec_sub(*req, now), GetNanosleepLatency()) < 0; + timespec_cmp(timespec_sub(*req, now), GetNanosleepThreshold()) < 0; } /** diff --git a/libc/nexgen32e/yield.h b/libc/nexgen32e/yield.h new file mode 100644 index 00000000000..221b70ece7e --- /dev/null +++ b/libc/nexgen32e/yield.h @@ -0,0 +1,16 @@ +#ifndef COSMOPOLITAN_LIBC_YIELD_H_ +#define COSMOPOLITAN_LIBC_YIELD_H_ +#if !(__ASSEMBLER__ + __LINKER__ + 0) +#ifdef _COSMO_SOURCE + +static inline void spin_yield(void) { +#if defined(__GNUC__) && defined(__aarch64__) + __asm__ volatile("yield"); +#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) + __asm__ volatile("pause"); +#endif +} + +#endif /* _COSMO_SOURCE */ +#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ +#endif /* COSMOPOLITAN_LIBC_YIELD_H_ */ diff --git a/tool/viz/clock_nanosleep_accuracy.c b/tool/viz/clock_nanosleep_accuracy.c new file mode 100644 index 00000000000..b7dbae45770 --- /dev/null +++ b/tool/viz/clock_nanosleep_accuracy.c @@ -0,0 +1,72 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2023 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/assert.h" +#include "libc/calls/struct/timespec.h" +#include "libc/intrin/kprintf.h" +#include "libc/runtime/runtime.h" +#include "libc/stdio/stdio.h" +#include "libc/sysv/consts/clock.h" + +#define MAXIMUM 1e8 +#define ITERATIONS 10 + +void WarmUp(void) { + struct timespec wf = {0, 1}; + npassert(!clock_nanosleep(CLOCK_REALTIME, 0, &wf, 0)); +} + +void TestSleepRealRelative(void) { + printf("\n"); + printf("testing: clock_nanosleep(CLOCK_REALTIME) with relative timeout\n"); + for (long nanos = 1; nanos < (long)MAXIMUM; nanos *= 2) { + struct timespec t1, t2, wf; + wf = timespec_fromnanos(nanos); + clock_gettime(CLOCK_REALTIME_PRECISE, &t1); + for (int i = 0; i < ITERATIONS; ++i) { + npassert(!clock_nanosleep(CLOCK_REALTIME, 0, &wf, 0)); + } + clock_gettime(CLOCK_REALTIME_PRECISE, &t2); + long took = timespec_tonanos(timespec_sub(t2, t1)) / ITERATIONS; + printf("%,11ld ns sleep took %,11ld ns delta %,11ld ns\n", nanos, took, + took - nanos); + } +} + +void TestSleepMonoRelative(void) { + printf("\n"); + printf("testing: clock_nanosleep(CLOCK_MONOTONIC) with relative timeout\n"); + for (long nanos = 1; nanos < (long)MAXIMUM; nanos *= 2) { + struct timespec t1, t2, wf; + wf = timespec_fromnanos(nanos); + clock_gettime(CLOCK_REALTIME_PRECISE, &t1); + for (int i = 0; i < ITERATIONS; ++i) { + npassert(!clock_nanosleep(CLOCK_MONOTONIC, 0, &wf, 0)); + } + clock_gettime(CLOCK_REALTIME_PRECISE, &t2); + long took = timespec_tonanos(timespec_sub(t2, t1)) / ITERATIONS; + printf("%,11ld ns sleep took %,11ld ns delta %,11ld ns\n", nanos, took, + took - nanos); + } +} + +int main(int argc, char *argv[]) { + WarmUp(); + TestSleepRealRelative(); + TestSleepMonoRelative(); +}