Skip to content

Commit

Permalink
Update nanotime and remove executable permission on nanotime.c
Browse files Browse the repository at this point in the history
  • Loading branch information
nightmareci committed Jul 30, 2024
1 parent b6ee433 commit eb0473b
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 23 deletions.
Empty file modified src/main_sdl/nanotime.c
100755 → 100644
Empty file.
91 changes: 68 additions & 23 deletions src/main_sdl/nanotime.h
Original file line number Diff line number Diff line change
Expand Up @@ -708,21 +708,49 @@ bool nanotime_step(nanotime_step_data* const stepper) {
const uint64_t shift = UINT64_C(4);

/*
* A big initial sleep lowers power usage on any platform, as more
* small sleep requests use more power than one bigger, equivalent
* sleep request. But we only want to do a big initial sleep if the
* target step sleep duration is "big enough"; in practice, 4ms is a
* good choice for the minimum "big enough" duration.
* The algorithm implemented here takes the assumption that a
* sequence of repeated sleep requests of the same requested
* duration end up being approximately of equal actual sleep
* duration, even if they're all well above the requested
* duration. In practice, such an assumption proves out to be
* true on various platforms.
*/
const uint64_t min_initial = NANOTIME_NSEC_PER_SEC / UINT64_C(250);
if (current_sleep_duration > min_initial) {
stepper->sleep(current_sleep_duration - min_initial);
const uint64_t initial_end_point = stepper->now();
const uint64_t initial_duration = nanotime_interval(start_point, initial_end_point, stepper->now_max);
if (initial_duration >= current_sleep_duration) {

/*
* A big initial sleep lowers power usage on any platform, as
* more small sleep requests use more power than fewer bigger,
* equivalent sleep requests. In practice, operating systems
* "actually sleep" when 1ms or more is requested, and 1ms is
* the minimum request duration you can make on some platforms
* (like older versions of Windows). Additionally, power usage
* is nice and low when doing the number of 1ms sleeps that's
* (hopefully) short of the target duration.
*
* But, the loop here maintains a maximum of the actual slept
* durations, breaking out when the time remaining is greater
* than or equal to the maximum found. By breaking out on the
* maximum found rather than just 1ms-or-less remaining,
* sleeping beyond the target deadline is reduced.
*/
{
uint64_t max = NANOTIME_NSEC_PER_SEC / UINT64_C(1000);
uint64_t start = stepper->now();
while (nanotime_interval(stepper->sleep_point, start, stepper->now_max) + max < total_sleep_duration) {
stepper->sleep(NANOTIME_NSEC_PER_SEC / UINT64_C(1000));
const uint64_t next = stepper->now();
const uint64_t current_interval = nanotime_interval(start, next, stepper->now_max);
if (current_interval > max) {
max = current_interval;
}
start = next;
}
const uint64_t initial_duration = nanotime_interval(start_point, stepper->now(), stepper->now_max);
if (initial_duration < current_sleep_duration) {
current_sleep_duration -= initial_duration;
}
else {
goto step_end;
}
current_sleep_duration -= initial_duration;
}

/*
Expand All @@ -732,15 +760,11 @@ bool nanotime_step(nanotime_step_data* const stepper) {
* to precisely sleep up to the deadline below this loop. The
* divisor is larger than two though, as it produces better
* behavior, and seems to work fine in testing on real
* hardware. This loop, and the one below, take the assumption
* that sleep requests of the same amount are roughly equal; by
* keeping track of the max of all the sleeps, the loops can be
* broken out of when the remaining time is less than the max,
* allowing the loop after this one to do shorter sleeps, with
* a corresponding smaller max of the sleeps. The overshoot
* possible in the loop below this one won't overshoot much, or
* in the best case won't overshoot so the busyloop can finish
* up the sleep precisely.
* hardware. The same method of keeping track of the max
* duration per loop of same sleep request durations above is
* used here. The overshoot possible in the loop below this one
* won't overshoot much, or in the best case won't overshoot,
* so the busyloop can finish up the sleep precisely.
*/
current_sleep_duration >>= shift;
for (
Expand All @@ -758,16 +782,25 @@ bool nanotime_step(nanotime_step_data* const stepper) {
}
}
}
if (nanotime_interval(stepper->sleep_point, stepper->now(), stepper->now_max) >= total_sleep_duration) {
goto step_end;
}

{
/*
* After (hopefully) stopping short of the deadline by
* a small amount, do small sleeps here to get closer
* to the deadline, but again attempting to stop short
* by an even smaller amount. It's best to do larger
* sleeps as done in the above loop, to reduce
* sleeps as done in the above loops, to reduce
* CPU/power usage, as each sleep iteration has a
* CPU/power usage cost.
* more-or-less fixed overhead of CPU/power usage.
*
* In testing on an M1 Mac mini running macOS, power
* usage is lower using zero-duration sleeps vs.
* nanotime_yield(), with no loss of timing precision.
* The same might be true for other hardwares/operating
* systems.
*/
uint64_t max = stepper->zero_sleep_duration;
uint64_t start;
Expand All @@ -787,6 +820,18 @@ bool nanotime_step(nanotime_step_data* const stepper) {
* reduce the remaining time to sleep to a minimum via
* process-yielding sleeps, so the amount of time spent
* spinning here is hopefully quite low.
*
* In testing on an M1 Mac mini running macOS,
* busylooping here produces the absolute greatest
* precision possible on the hardware, down to the
* sub-10ns-off-per-update range for longish stretches
* during 60 Hz updates, but in the
* hundreds-to-thousands of nanoseconds off when using
* nanotime_yield() or zero-duration sleeps. And,
* because the sleeping algorithm above does such a
* good job of stopping very close to the deadline,
* busylooping here has basically negligible difference
* in power usage vs. yields/zero-duration sleeps.
*/
uint64_t current_time;
uint64_t accumulated;
Expand Down

0 comments on commit eb0473b

Please sign in to comment.