Skip to content

Commit

Permalink
Reduce pthread memory usage
Browse files Browse the repository at this point in the history
  • Loading branch information
jart committed Dec 24, 2024
1 parent ec2db4e commit 93e22c5
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 58 deletions.
6 changes: 3 additions & 3 deletions libc/intrin/stack.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ static struct CosmoStacks cosmo_stacks = {
};

static struct CosmoStacksConfig cosmo_stacks_config = {
.maxstacks = 16,
.maxstacks = 3,
};

void cosmo_stack_lock(void) {
Expand Down Expand Up @@ -169,7 +169,7 @@ int cosmo_stack_getmaxstacks(void) {
*
* Please note this limit only applies to stacks that aren't in use.
*
* Your default is sixteen stacks may be cached at any given moment.
* Your default is three stacks may be cached at any given moment.
*
* If `maxstacks` is less than the current cache size, then surplus
* entries will be evicted and freed before this function returns.
Expand Down Expand Up @@ -292,10 +292,10 @@ errno_t cosmo_stack_free(void *stackaddr, unsigned stacksize,
return EINVAL;
if ((uintptr_t)stackaddr & (__gransize - 1))
return EINVAL;
cosmo_once(&cosmo_stacks.once, cosmo_stack_setup);
cosmo_stack_lock();
struct Dll *surplus = 0;
if (cosmo_stacks_config.maxstacks) {
cosmo_once(&cosmo_stacks.once, cosmo_stack_setup);
surplus = cosmo_stack_decimate(cosmo_stacks_config.maxstacks - 1);
struct CosmoStack *ts = 0;
if (dll_is_empty(cosmo_stacks.objects))
Expand Down
43 changes: 25 additions & 18 deletions libc/proc/fork.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,30 +114,30 @@ static void fork_prepare(void) {
fork_prepare_stdio();
__localtime_lock();
__dlopen_lock();
if (_weaken(cosmo_stack_lock))
_weaken(cosmo_stack_lock)();
__cxa_lock();
__gdtoa_lock1();
__gdtoa_lock();
_pthread_lock();
dlmalloc_pre_fork();
__fds_lock();
_pthread_mutex_lock(&__rand64_lock_obj);
if (_weaken(cosmo_stack_lock))
_weaken(cosmo_stack_lock)();
__maps_lock();
LOCKTRACE("READY TO LOCK AND ROLL");
}

static void fork_parent(void) {
__maps_unlock();
if (_weaken(cosmo_stack_unlock))
_weaken(cosmo_stack_unlock)();
_pthread_mutex_unlock(&__rand64_lock_obj);
__fds_unlock();
dlmalloc_post_fork_parent();
_pthread_unlock();
__gdtoa_unlock();
__gdtoa_unlock1();
__cxa_unlock();
if (_weaken(cosmo_stack_unlock))
_weaken(cosmo_stack_unlock)();
__dlopen_unlock();
__localtime_unlock();
fork_parent_stdio();
Expand All @@ -148,8 +148,6 @@ static void fork_parent(void) {

static void fork_child(void) {
nsync_mu_semaphore_sem_fork_child();
if (_weaken(cosmo_stack_wipe))
_weaken(cosmo_stack_wipe)();
_pthread_mutex_wipe_np(&__dlopen_lock_obj);
_pthread_mutex_wipe_np(&__rand64_lock_obj);
_pthread_mutex_wipe_np(&__fds_lock_obj);
Expand All @@ -159,6 +157,8 @@ static void fork_child(void) {
fork_child_stdio();
_pthread_mutex_wipe_np(&__pthread_lock_obj);
_pthread_mutex_wipe_np(&__cxa_lock_obj);
if (_weaken(cosmo_stack_wipe))
_weaken(cosmo_stack_wipe)();
_pthread_mutex_wipe_np(&__localtime_lock_obj);
if (IsWindows()) {
// we don't bother locking the proc/itimer/sig locks above since
Expand Down Expand Up @@ -204,11 +204,11 @@ int _fork(uint32_t dwCreationFlags) {
struct CosmoTib *tib = __get_tls();
struct PosixThread *pt = (struct PosixThread *)tib->tib_pthread;
tid = IsLinux() || IsXnuSilicon() ? dx : sys_gettid();
atomic_store_explicit(&tib->tib_tid, tid, memory_order_relaxed);
atomic_store_explicit(&pt->ptid, tid, memory_order_relaxed);
atomic_init(&tib->tib_tid, tid);
atomic_init(&pt->ptid, tid);

// tracing and kisdangerous need this lock wiped a little earlier
atomic_store_explicit(&__maps.lock.word, 0, memory_order_relaxed);
atomic_init(&__maps.lock.word, 0);

/*
* it's now safe to call normal functions again
Expand All @@ -218,14 +218,10 @@ int _fork(uint32_t dwCreationFlags) {
// we can't free() them since we're monopolizing all locks
// we assume the operating system already reclaimed system handles
dll_remove(&_pthread_list, &pt->list);
for (e = dll_first(_pthread_list); e; e = dll_next(_pthread_list, e)) {
atomic_store_explicit(&POSIXTHREAD_CONTAINER(e)->pt_status,
kPosixThreadZombie, memory_order_relaxed);
atomic_store_explicit(&POSIXTHREAD_CONTAINER(e)->tib->tib_syshand, 0,
memory_order_relaxed);
}
struct Dll *old_threads = _pthread_list;
_pthread_list = 0;
dll_make_first(&_pthread_list, &pt->list);
atomic_store_explicit(&_pthread_count, 1, memory_order_relaxed);
atomic_init(&_pthread_count, 1);

// get new system thread handle
intptr_t syshand = 0;
Expand All @@ -236,16 +232,27 @@ int _fork(uint32_t dwCreationFlags) {
GetCurrentProcess(), &syshand, 0, false,
kNtDuplicateSameAccess);
}
atomic_store_explicit(&tib->tib_syshand, syshand, memory_order_relaxed);
atomic_init(&tib->tib_syshand, syshand);

// we can't be canceled if the canceler no longer exists
atomic_store_explicit(&pt->pt_canceled, false, memory_order_relaxed);
atomic_init(&pt->pt_canceled, false);

// forget locks
memset(tib->tib_locks, 0, sizeof(tib->tib_locks));

// run user fork callbacks
fork_child();

// free threads
if (_weaken(_pthread_free)) {
while ((e = dll_first(old_threads))) {
pt = POSIXTHREAD_CONTAINER(e);
atomic_init(&pt->tib->tib_syshand, 0);
dll_remove(&old_threads, e);
_weaken(_pthread_free)(pt);
}
}

STRACE("fork() → 0 (child of %d; took %ld us)", parent, micros);
} else {
// this is the parent process
Expand Down
8 changes: 4 additions & 4 deletions libc/runtime/clone.c
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ static errno_t CloneSilicon(int (*fn)(void *, int), char *stk, size_t stksz,
wt = (struct CloneArgs *)sp;
sp = AlignStack(sp, stk, stksz, 16);
tid = atomic_fetch_add_explicit(&tids, 1, memory_order_acq_rel);
wt->this = tid = (tid & (kMaxThreadIds - 1)) + kMinThreadId;
wt->this = tid = (tid % kMaxThreadIds) + kMinThreadId;
wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid;
wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid;
wt->tls = flags & CLONE_SETTLS ? tls : 0;
Expand All @@ -550,9 +550,9 @@ static errno_t CloneSilicon(int (*fn)(void *, int), char *stk, size_t stksz,
unassert(!__syslib->__pthread_attr_init(attr));
unassert(!__syslib->__pthread_attr_setguardsize(attr, 0));
unassert(!__syslib->__pthread_attr_setstacksize(attr, babystack));
if (!(res = __syslib->__pthread_create(&th, attr, SiliconThreadMain, wt)) &&
(flags & CLONE_PARENT_SETTID)) {
*ptid = tid;
if (!(res = __syslib->__pthread_create(&th, attr, SiliconThreadMain, wt))) {
if (flags & CLONE_PARENT_SETTID)
*ptid = tid;
if (flags & CLONE_SETTLS) {
struct CosmoTib *tib = tls;
atomic_store_explicit(&tib[-1].tib_syshand, th, memory_order_release);
Expand Down
3 changes: 2 additions & 1 deletion libc/thread/posixthread.internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ struct PosixThread {
typedef void (*atfork_f)(void);

extern struct Dll *_pthread_list;
extern _Atomic(unsigned) _pthread_count;
extern atomic_uint _pthread_count;
extern struct PosixThread _pthread_static;
extern _Atomic(pthread_key_dtor) _pthread_key_dtor[PTHREAD_KEYS_MAX];

Expand All @@ -109,6 +109,7 @@ int _pthread_tid(struct PosixThread *) libcesque;
intptr_t _pthread_syshand(struct PosixThread *) libcesque;
long _pthread_cancel_ack(void) libcesque;
void _pthread_decimate(void) libcesque;
void _pthread_free(struct PosixThread *) libcesque paramsnonnull();
void _pthread_lock(void) libcesque;
void _pthread_onfork_child(void) libcesque;
void _pthread_onfork_parent(void) libcesque;
Expand Down
4 changes: 2 additions & 2 deletions libc/thread/pthread_create.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ __static_yoink("_pthread_onfork_prepare");
__static_yoink("_pthread_onfork_parent");
__static_yoink("_pthread_onfork_child");

static void _pthread_free(struct PosixThread *pt) {
void _pthread_free(struct PosixThread *pt) {

// thread must be removed from _pthread_list before calling
unassert(dll_is_alone(&pt->list) && &pt->list != _pthread_list);
Expand All @@ -84,7 +84,7 @@ static void _pthread_free(struct PosixThread *pt) {
// free any additional upstream system resources
// our fork implementation wipes this handle in child automatically
uint64_t syshand =
atomic_load_explicit(&pt->tib->tib_syshand, memory_order_acquire);
atomic_load_explicit(&pt->tib->tib_syshand, memory_order_relaxed);
if (syshand) {
if (IsWindows())
unassert(CloseHandle(syshand)); // non-inheritable
Expand Down
69 changes: 39 additions & 30 deletions libc/thread/pthread_timedjoin_np.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,33 +62,34 @@ static const char *DescribeReturnValue(char buf[30], int err, void **value) {
* @cancelationpoint
*/
static errno_t _pthread_wait(atomic_int *ctid, struct timespec *abstime) {
int x, e;
errno_t err = 0;
if (ctid == &__get_tls()->tib_tid) {
// "If an implementation detects that the value specified by the
// thread argument to pthread_join() refers to the calling thread,
// it is recommended that the function should fail and report an
// [EDEADLK] error." ──Quoth POSIX.1-2017
err = EDEADLK;
} else {
// "If the thread calling pthread_join() is canceled, then the target
// thread shall not be detached." ──Quoth POSIX.1-2017
if (!(err = pthread_testcancel_np())) {
BEGIN_CANCELATION_POINT;
while ((x = atomic_load_explicit(ctid, memory_order_acquire))) {
e = cosmo_futex_wait(ctid, x, !IsWindows() && !IsXnu(), CLOCK_REALTIME,

// "If an implementation detects that the value specified by the
// thread argument to pthread_join() refers to the calling thread,
// it is recommended that the function should fail and report an
// [EDEADLK] error." ──Quoth POSIX.1-2017
if (ctid == &__get_tls()->tib_tid)
return EDEADLK;

// "If the thread calling pthread_join() is canceled, then the target
// thread shall not be detached." ──Quoth POSIX.1-2017
errno_t err;
if ((err = pthread_testcancel_np()))
return err;

BEGIN_CANCELATION_POINT;
int x;
while ((x = atomic_load_explicit(ctid, memory_order_acquire))) {
int e = cosmo_futex_wait(ctid, x, !IsWindows() && !IsXnu(), CLOCK_REALTIME,
abstime);
if (e == -ECANCELED) {
err = ECANCELED;
break;
} else if (e == -ETIMEDOUT) {
err = EBUSY;
break;
}
}
END_CANCELATION_POINT;
if (e == -ECANCELED) {
err = ECANCELED;
break;
} else if (e == -ETIMEDOUT) {
err = EBUSY;
break;
}
}
END_CANCELATION_POINT;
return err;
}

Expand Down Expand Up @@ -117,12 +118,11 @@ static errno_t _pthread_wait(atomic_int *ctid, struct timespec *abstime) {
errno_t pthread_timedjoin_np(pthread_t thread, void **value_ptr,
struct timespec *abstime) {
int tid;
errno_t err = 0;
errno_t err;
struct PosixThread *pt;
enum PosixThreadStatus status;
pt = (struct PosixThread *)thread;
unassert(thread);
_pthread_ref(pt);

// "The behavior is undefined if the value specified by the thread
// argument to pthread_join() does not refer to a joinable thread."
Expand All @@ -135,14 +135,23 @@ errno_t pthread_timedjoin_np(pthread_t thread, void **value_ptr,
// specifying the same target thread are undefined."
// ──Quoth POSIX.1-2017
if (!(err = _pthread_wait(&pt->tib->tib_tid, abstime))) {
atomic_store_explicit(&pt->pt_status, kPosixThreadZombie,
memory_order_release);
_pthread_zombify(pt);
if (value_ptr)
*value_ptr = pt->pt_val;
if (atomic_load_explicit(&pt->pt_refs, memory_order_acquire)) {
_pthread_lock();
dll_remove(&_pthread_list, &pt->list);
dll_make_last(&_pthread_list, &pt->list);
atomic_store_explicit(&pt->pt_status, kPosixThreadZombie,
memory_order_release);
_pthread_unlock();
} else {
_pthread_lock();
dll_remove(&_pthread_list, &pt->list);
_pthread_unlock();
_pthread_free(pt);
}
}

_pthread_unref(pt);
STRACE("pthread_timedjoin_np(%d, %s, %s) → %s", tid,
DescribeReturnValue(alloca(30), err, value_ptr),
DescribeTimespec(err ? -1 : 0, abstime), DescribeErrno(err));
Expand Down

0 comments on commit 93e22c5

Please sign in to comment.