diff --git a/libc/intrin/stack.c b/libc/intrin/stack.c index 09ee635b5e2..d1a1320a68f 100644 --- a/libc/intrin/stack.c +++ b/libc/intrin/stack.c @@ -64,7 +64,7 @@ static struct CosmoStacks cosmo_stacks = { }; static struct CosmoStacksConfig cosmo_stacks_config = { - .maxstacks = 16, + .maxstacks = 3, }; void cosmo_stack_lock(void) { @@ -169,7 +169,7 @@ int cosmo_stack_getmaxstacks(void) { * * Please note this limit only applies to stacks that aren't in use. * - * Your default is sixteen stacks may be cached at any given moment. + * Your default is three stacks may be cached at any given moment. * * If `maxstacks` is less than the current cache size, then surplus * entries will be evicted and freed before this function returns. @@ -292,10 +292,10 @@ errno_t cosmo_stack_free(void *stackaddr, unsigned stacksize, return EINVAL; if ((uintptr_t)stackaddr & (__gransize - 1)) return EINVAL; - cosmo_once(&cosmo_stacks.once, cosmo_stack_setup); cosmo_stack_lock(); struct Dll *surplus = 0; if (cosmo_stacks_config.maxstacks) { + cosmo_once(&cosmo_stacks.once, cosmo_stack_setup); surplus = cosmo_stack_decimate(cosmo_stacks_config.maxstacks - 1); struct CosmoStack *ts = 0; if (dll_is_empty(cosmo_stacks.objects)) diff --git a/libc/proc/fork.c b/libc/proc/fork.c index 42c194d27f9..a90d2f5effd 100644 --- a/libc/proc/fork.c +++ b/libc/proc/fork.c @@ -114,6 +114,8 @@ static void fork_prepare(void) { fork_prepare_stdio(); __localtime_lock(); __dlopen_lock(); + if (_weaken(cosmo_stack_lock)) + _weaken(cosmo_stack_lock)(); __cxa_lock(); __gdtoa_lock1(); __gdtoa_lock(); @@ -121,16 +123,12 @@ static void fork_prepare(void) { dlmalloc_pre_fork(); __fds_lock(); _pthread_mutex_lock(&__rand64_lock_obj); - if (_weaken(cosmo_stack_lock)) - _weaken(cosmo_stack_lock)(); __maps_lock(); LOCKTRACE("READY TO LOCK AND ROLL"); } static void fork_parent(void) { __maps_unlock(); - if (_weaken(cosmo_stack_unlock)) - _weaken(cosmo_stack_unlock)(); _pthread_mutex_unlock(&__rand64_lock_obj); __fds_unlock(); dlmalloc_post_fork_parent(); @@ -138,6 +136,8 @@ static void fork_parent(void) { __gdtoa_unlock(); __gdtoa_unlock1(); __cxa_unlock(); + if (_weaken(cosmo_stack_unlock)) + _weaken(cosmo_stack_unlock)(); __dlopen_unlock(); __localtime_unlock(); fork_parent_stdio(); @@ -148,8 +148,6 @@ static void fork_parent(void) { static void fork_child(void) { nsync_mu_semaphore_sem_fork_child(); - if (_weaken(cosmo_stack_wipe)) - _weaken(cosmo_stack_wipe)(); _pthread_mutex_wipe_np(&__dlopen_lock_obj); _pthread_mutex_wipe_np(&__rand64_lock_obj); _pthread_mutex_wipe_np(&__fds_lock_obj); @@ -159,6 +157,8 @@ static void fork_child(void) { fork_child_stdio(); _pthread_mutex_wipe_np(&__pthread_lock_obj); _pthread_mutex_wipe_np(&__cxa_lock_obj); + if (_weaken(cosmo_stack_wipe)) + _weaken(cosmo_stack_wipe)(); _pthread_mutex_wipe_np(&__localtime_lock_obj); if (IsWindows()) { // we don't bother locking the proc/itimer/sig locks above since @@ -204,11 +204,11 @@ int _fork(uint32_t dwCreationFlags) { struct CosmoTib *tib = __get_tls(); struct PosixThread *pt = (struct PosixThread *)tib->tib_pthread; tid = IsLinux() || IsXnuSilicon() ? dx : sys_gettid(); - atomic_store_explicit(&tib->tib_tid, tid, memory_order_relaxed); - atomic_store_explicit(&pt->ptid, tid, memory_order_relaxed); + atomic_init(&tib->tib_tid, tid); + atomic_init(&pt->ptid, tid); // tracing and kisdangerous need this lock wiped a little earlier - atomic_store_explicit(&__maps.lock.word, 0, memory_order_relaxed); + atomic_init(&__maps.lock.word, 0); /* * it's now safe to call normal functions again @@ -218,14 +218,10 @@ int _fork(uint32_t dwCreationFlags) { // we can't free() them since we're monopolizing all locks // we assume the operating system already reclaimed system handles dll_remove(&_pthread_list, &pt->list); - for (e = dll_first(_pthread_list); e; e = dll_next(_pthread_list, e)) { - atomic_store_explicit(&POSIXTHREAD_CONTAINER(e)->pt_status, - kPosixThreadZombie, memory_order_relaxed); - atomic_store_explicit(&POSIXTHREAD_CONTAINER(e)->tib->tib_syshand, 0, - memory_order_relaxed); - } + struct Dll *old_threads = _pthread_list; + _pthread_list = 0; dll_make_first(&_pthread_list, &pt->list); - atomic_store_explicit(&_pthread_count, 1, memory_order_relaxed); + atomic_init(&_pthread_count, 1); // get new system thread handle intptr_t syshand = 0; @@ -236,16 +232,27 @@ int _fork(uint32_t dwCreationFlags) { GetCurrentProcess(), &syshand, 0, false, kNtDuplicateSameAccess); } - atomic_store_explicit(&tib->tib_syshand, syshand, memory_order_relaxed); + atomic_init(&tib->tib_syshand, syshand); // we can't be canceled if the canceler no longer exists - atomic_store_explicit(&pt->pt_canceled, false, memory_order_relaxed); + atomic_init(&pt->pt_canceled, false); // forget locks memset(tib->tib_locks, 0, sizeof(tib->tib_locks)); // run user fork callbacks fork_child(); + + // free threads + if (_weaken(_pthread_free)) { + while ((e = dll_first(old_threads))) { + pt = POSIXTHREAD_CONTAINER(e); + atomic_init(&pt->tib->tib_syshand, 0); + dll_remove(&old_threads, e); + _weaken(_pthread_free)(pt); + } + } + STRACE("fork() → 0 (child of %d; took %ld us)", parent, micros); } else { // this is the parent process diff --git a/libc/runtime/clone.c b/libc/runtime/clone.c index 25b948a08d6..a3b35c690e9 100644 --- a/libc/runtime/clone.c +++ b/libc/runtime/clone.c @@ -535,7 +535,7 @@ static errno_t CloneSilicon(int (*fn)(void *, int), char *stk, size_t stksz, wt = (struct CloneArgs *)sp; sp = AlignStack(sp, stk, stksz, 16); tid = atomic_fetch_add_explicit(&tids, 1, memory_order_acq_rel); - wt->this = tid = (tid & (kMaxThreadIds - 1)) + kMinThreadId; + wt->this = tid = (tid % kMaxThreadIds) + kMinThreadId; wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid; wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid; wt->tls = flags & CLONE_SETTLS ? tls : 0; @@ -550,9 +550,9 @@ static errno_t CloneSilicon(int (*fn)(void *, int), char *stk, size_t stksz, unassert(!__syslib->__pthread_attr_init(attr)); unassert(!__syslib->__pthread_attr_setguardsize(attr, 0)); unassert(!__syslib->__pthread_attr_setstacksize(attr, babystack)); - if (!(res = __syslib->__pthread_create(&th, attr, SiliconThreadMain, wt)) && - (flags & CLONE_PARENT_SETTID)) { - *ptid = tid; + if (!(res = __syslib->__pthread_create(&th, attr, SiliconThreadMain, wt))) { + if (flags & CLONE_PARENT_SETTID) + *ptid = tid; if (flags & CLONE_SETTLS) { struct CosmoTib *tib = tls; atomic_store_explicit(&tib[-1].tib_syshand, th, memory_order_release); diff --git a/libc/thread/posixthread.internal.h b/libc/thread/posixthread.internal.h index 09f1f9ae5e0..8468f43c28a 100644 --- a/libc/thread/posixthread.internal.h +++ b/libc/thread/posixthread.internal.h @@ -94,7 +94,7 @@ struct PosixThread { typedef void (*atfork_f)(void); extern struct Dll *_pthread_list; -extern _Atomic(unsigned) _pthread_count; +extern atomic_uint _pthread_count; extern struct PosixThread _pthread_static; extern _Atomic(pthread_key_dtor) _pthread_key_dtor[PTHREAD_KEYS_MAX]; @@ -109,6 +109,7 @@ int _pthread_tid(struct PosixThread *) libcesque; intptr_t _pthread_syshand(struct PosixThread *) libcesque; long _pthread_cancel_ack(void) libcesque; void _pthread_decimate(void) libcesque; +void _pthread_free(struct PosixThread *) libcesque paramsnonnull(); void _pthread_lock(void) libcesque; void _pthread_onfork_child(void) libcesque; void _pthread_onfork_parent(void) libcesque; diff --git a/libc/thread/pthread_create.c b/libc/thread/pthread_create.c index 351a18c8bc6..1207d03b603 100644 --- a/libc/thread/pthread_create.c +++ b/libc/thread/pthread_create.c @@ -67,7 +67,7 @@ __static_yoink("_pthread_onfork_prepare"); __static_yoink("_pthread_onfork_parent"); __static_yoink("_pthread_onfork_child"); -static void _pthread_free(struct PosixThread *pt) { +void _pthread_free(struct PosixThread *pt) { // thread must be removed from _pthread_list before calling unassert(dll_is_alone(&pt->list) && &pt->list != _pthread_list); @@ -84,7 +84,7 @@ static void _pthread_free(struct PosixThread *pt) { // free any additional upstream system resources // our fork implementation wipes this handle in child automatically uint64_t syshand = - atomic_load_explicit(&pt->tib->tib_syshand, memory_order_acquire); + atomic_load_explicit(&pt->tib->tib_syshand, memory_order_relaxed); if (syshand) { if (IsWindows()) unassert(CloseHandle(syshand)); // non-inheritable diff --git a/libc/thread/pthread_timedjoin_np.c b/libc/thread/pthread_timedjoin_np.c index 142ae47342b..8cfe73282a7 100644 --- a/libc/thread/pthread_timedjoin_np.c +++ b/libc/thread/pthread_timedjoin_np.c @@ -62,33 +62,34 @@ static const char *DescribeReturnValue(char buf[30], int err, void **value) { * @cancelationpoint */ static errno_t _pthread_wait(atomic_int *ctid, struct timespec *abstime) { - int x, e; - errno_t err = 0; - if (ctid == &__get_tls()->tib_tid) { - // "If an implementation detects that the value specified by the - // thread argument to pthread_join() refers to the calling thread, - // it is recommended that the function should fail and report an - // [EDEADLK] error." ──Quoth POSIX.1-2017 - err = EDEADLK; - } else { - // "If the thread calling pthread_join() is canceled, then the target - // thread shall not be detached." ──Quoth POSIX.1-2017 - if (!(err = pthread_testcancel_np())) { - BEGIN_CANCELATION_POINT; - while ((x = atomic_load_explicit(ctid, memory_order_acquire))) { - e = cosmo_futex_wait(ctid, x, !IsWindows() && !IsXnu(), CLOCK_REALTIME, + + // "If an implementation detects that the value specified by the + // thread argument to pthread_join() refers to the calling thread, + // it is recommended that the function should fail and report an + // [EDEADLK] error." ──Quoth POSIX.1-2017 + if (ctid == &__get_tls()->tib_tid) + return EDEADLK; + + // "If the thread calling pthread_join() is canceled, then the target + // thread shall not be detached." ──Quoth POSIX.1-2017 + errno_t err; + if ((err = pthread_testcancel_np())) + return err; + + BEGIN_CANCELATION_POINT; + int x; + while ((x = atomic_load_explicit(ctid, memory_order_acquire))) { + int e = cosmo_futex_wait(ctid, x, !IsWindows() && !IsXnu(), CLOCK_REALTIME, abstime); - if (e == -ECANCELED) { - err = ECANCELED; - break; - } else if (e == -ETIMEDOUT) { - err = EBUSY; - break; - } - } - END_CANCELATION_POINT; + if (e == -ECANCELED) { + err = ECANCELED; + break; + } else if (e == -ETIMEDOUT) { + err = EBUSY; + break; } } + END_CANCELATION_POINT; return err; } @@ -117,12 +118,11 @@ static errno_t _pthread_wait(atomic_int *ctid, struct timespec *abstime) { errno_t pthread_timedjoin_np(pthread_t thread, void **value_ptr, struct timespec *abstime) { int tid; - errno_t err = 0; + errno_t err; struct PosixThread *pt; enum PosixThreadStatus status; pt = (struct PosixThread *)thread; unassert(thread); - _pthread_ref(pt); // "The behavior is undefined if the value specified by the thread // argument to pthread_join() does not refer to a joinable thread." @@ -135,14 +135,23 @@ errno_t pthread_timedjoin_np(pthread_t thread, void **value_ptr, // specifying the same target thread are undefined." // ──Quoth POSIX.1-2017 if (!(err = _pthread_wait(&pt->tib->tib_tid, abstime))) { - atomic_store_explicit(&pt->pt_status, kPosixThreadZombie, - memory_order_release); - _pthread_zombify(pt); if (value_ptr) *value_ptr = pt->pt_val; + if (atomic_load_explicit(&pt->pt_refs, memory_order_acquire)) { + _pthread_lock(); + dll_remove(&_pthread_list, &pt->list); + dll_make_last(&_pthread_list, &pt->list); + atomic_store_explicit(&pt->pt_status, kPosixThreadZombie, + memory_order_release); + _pthread_unlock(); + } else { + _pthread_lock(); + dll_remove(&_pthread_list, &pt->list); + _pthread_unlock(); + _pthread_free(pt); + } } - _pthread_unref(pt); STRACE("pthread_timedjoin_np(%d, %s, %s) → %s", tid, DescribeReturnValue(alloca(30), err, value_ptr), DescribeTimespec(err ? -1 : 0, abstime), DescribeErrno(err));