Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Propagate graceful shutdown #76

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions inc/class-runner.php
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,25 @@ class Runner {
public function __construct( $options = [] ) {
$defaults = [
'max_workers' => 4,

// After receiving a SIGTERM, delay until we propagate SIGTERM to
// workers. This will kill any jobs which aren't specifically
// designed to catch and ignore it, so should be set to a
// reasonable value for general WordPress jobs.
// (Delay in seconds, or false to disable.)
'graceful_shutdown_timeout' => 30,

// After sending a SIGTERM, delay until we send a SIGKILL to
// force-shutdown any workers. This should be set to a higher
// value than graceful_shutdown_timeout.
//
// Delay is specified as *total* time after Cavalcade-Runner
// receives the SIGTERM from the system.
// (Workers will have `force_shutdown_timeout - graceful_shutdown_timeout`
// seconds to shut down gracefully.)
//
// (Delay in seconds, or false to disable.)
'force_shutdown_timeout' => 90,
];
$this->options = array_merge( $defaults, $options );
$this->hooks = new Hooks();
Expand Down Expand Up @@ -147,6 +166,8 @@ public function run() {
}

public function terminate( $signal ) {
$received_at = microtime( true );

/**
* Action before terminating workers.
*
Expand All @@ -158,8 +179,38 @@ public function terminate( $signal ) {

printf( 'Cavalcade received terminate signal (%s), shutting down %d worker(s)...' . PHP_EOL, $signal, count( $this->workers ) );
// Wait and clean up

$graceful = $this->options['graceful_shutdown_timeout'];
$did_graceful = false;
$force = $this->options['force_shutdown_timeout'];
while ( ! empty( $this->workers ) ) {
$this->check_workers();

$now = microtime( true );

// If we've reached the graceful timeout, pass on the SIGTERM.
// This will kill any workers that aren't intentionally capturing
// SIGTERMs (eg any non-Cavalcade jobs in WP)
if ( $graceful !== false && $now >= ( $received_at + $graceful ) ) {
printf( 'Graceful shutdown timeout reached, sending SIGTERM to %d worker(s)...' . PHP_EOL, count( $this->workers ) );
foreach ( $this->workers as $worker ) {
$worker->sigterm();
}
$did_graceful = true;
}

// If we've reached the force timeout, we need to kill the workers.
if ( $force !== false && $now >= ( $received_at + $force ) ) {
printf( 'Force shutdown timeout reached, sending SIGKILL to %d worker(s)...' . PHP_EOL, count( $this->workers ) );
foreach ( $this->workers as $worker ) {
$worker->sigkill();
}

// Perform final check, then break.
$this->check_workers();
break;
}

usleep( 100000 );
}

Expand Down
20 changes: 20 additions & 0 deletions inc/class-worker.php
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,26 @@ public function drain_pipes() {
}
}

/**
* Send a SIGTERM to the process.
*
* This is used by Runner::terminate() to indicate a graceful shutdown.
* Workers have 60s (by default) to shut down gracefully.
*/
public function sigterm() {
proc_terminate( $this->process, SIGTERM );
}

/**
* Send a SIGKILL to the process.
*
* This is used by Runner::terminate() to indicate a forced shutdown.
* Workers have 60s (by default) to shut down gracefully.
*/
public function sigkill() {
proc_terminate( $this->process, SIGKILL );
}

/**
* Shut down the process
*
Expand Down
11 changes: 10 additions & 1 deletion systemd.service
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,20 @@ Description=CavalcadeRunner

[Service]
TimeoutStartSec=0
TimeoutStopSec=600
Restart=always
WorkingDirectory=/srv/www/webroot
User=www-data
ExecStart=/etc/cavalcade/bin/cavalcade

# When killing Cavalcade (eg system shutdown or service stop), send the
# SIGTERM only to the runner so that we can propagate it ourselves. (SIGKILL
# is sent for cleanup.)
KillMode=mixed

# Wait 10min before sending SIGKILL when stopping. This should be set to a
# value higher than the force_shutdown_timeout option to ensure the Runner can
# gracefully stop children.
TimeoutStopSec=600

[Install]
WantedBy=multi-user.target