Skip to content

Commit

Permalink
SHM cleanup workaround (#1411)
Browse files Browse the repository at this point in the history
* SHM cleanup workaround

* fix CI

* fix SHM tests

* fix CI

* fix CI

* use proper tokio runtime for spawning cleanup tasks

* get rid of signal_hook_tokio as it doesn't support win

* - remove unnecessary dep
- fix windows compilation

* - There is no SIGHUP mapping on win

* [skip ci] doc comment update

Co-authored-by: Luca Cominardi <[email protected]>

* [skip ci] doc comment update

Co-authored-by: Luca Cominardi <[email protected]>

* Review fixes

---------

Co-authored-by: Luca Cominardi <[email protected]>
  • Loading branch information
yellowhatter and Mallets authored Sep 17, 2024
1 parent 58a9ee2 commit 9746429
Show file tree
Hide file tree
Showing 7 changed files with 78 additions and 17 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ stabby = "36.1.1"
sha3 = "0.10.8"
shared_memory = "0.12.4"
shellexpand = "3.1.0"
signal-hook = { version = "0.3.17", default-features = false }
socket2 = { version = "0.5.7", features = ["all"] }
stop-token = "0.7.0"
syn = "2.0"
Expand Down
1 change: 1 addition & 0 deletions commons/zenoh-shm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ num_cpus = { workspace = true, optional = true }
thread-priority = { workspace = true }
lockfree = { workspace = true }
stabby = { workspace = true }
signal-hook = { workspace = true }

[dev-dependencies]
libc = { workspace = true }
35 changes: 35 additions & 0 deletions commons/zenoh-shm/src/api/cleanup.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
//
// Copyright (c) 2024 ZettaScale Technology
//
// This program and the accompanying materials are made available under the
// terms of the Eclipse Public License 2.0 which is available at
// http://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0
// which is available at https://www.apache.org/licenses/LICENSE-2.0.
//
// SPDX-License-Identifier: EPL-2.0 OR Apache-2.0
//
// Contributors:
// ZettaScale Zenoh Team, <[email protected]>
//

use crate::cleanup::CLEANUP;

/// Make forced cleanup
/// NOTE: this is a part of a temporary on-exit-cleanup workaround and it will be very likely removed in the future.
/// WARN: The improper usage can break the application logic, impacting SHM-utilizing Sessions in other processes.
/// Cleanup unlinks SHM segments _created_ by current process from filesystem with the following consequences:
/// - Sessions that are not linked to this segment will fail to link it if they try. Such kind of errors are properly handled.
/// - Already linked processes will still have this shared memory mapped and safely accessible
/// - The actual memory will be reclaimed by the OS only after last process using it will close it or exit
///
/// In order to properly cleanup some SHM internals upon process exit, Zenoh installs exit handlers (see atexit() API).
/// The atexit handler is executed only on process exit(), the inconvenience is that terminating signal handlers
/// (like SIGINT) bypass it and terminate the process without cleanup. To eliminate this effect, Zenoh overrides
/// SIGHUP, SIGTERM, SIGINT and SIGQUIT handlers and calls exit() inside to make graceful shutdown. If user is going to
/// override these Zenoh's handlers, the workaround will break, and there are two ways to keep this workaround working:
/// - execute overridden Zenoh handlers in overriding handler code
/// - call force_cleanup_before_exit() anywhere at any time before terminating the process
#[zenoh_macros::unstable_doc]
pub fn force_cleanup_before_exit() {
CLEANUP.read().cleanup();
}
1 change: 1 addition & 0 deletions commons/zenoh-shm/src/api/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
//

pub mod buffer;
pub mod cleanup;
pub mod client;
pub mod client_storage;
pub mod common;
Expand Down
32 changes: 27 additions & 5 deletions commons/zenoh-shm/src/cleanup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
// ZettaScale Zenoh Team, <[email protected]>
//

use signal_hook::consts::signal::*;
use static_init::dynamic;

/// A global cleanup, that is guaranteed to be dropped at normal program exit and that will
Expand All @@ -26,22 +27,43 @@ pub(crate) struct Cleanup {

impl Cleanup {
fn new() -> Self {
// todo: this is a workaround to make sure Cleanup will be executed even if process terminates via signal handlers
// that execute std::terminate instead of exit
for signal in [
#[cfg(not(target_os = "windows"))]
SIGHUP,
SIGTERM,
SIGINT,
#[cfg(not(target_os = "windows"))]
SIGQUIT,
] {
unsafe {
let _ = signal_hook::low_level::register(signal, || {
std::process::exit(0);
});
}
}

Self {
cleanups: Default::default(),
}
}

pub(crate) fn cleanup(&self) {
while let Some(cleanup) = self.cleanups.pop() {
if let Some(f) = cleanup {
f();
}
}
}

pub(crate) fn register_cleanup(&self, cleanup_fn: Box<dyn FnOnce() + Send>) {
self.cleanups.push(Some(cleanup_fn));
}
}

impl Drop for Cleanup {
fn drop(&mut self) {
while let Some(cleanup) = self.cleanups.pop() {
if let Some(f) = cleanup {
f();
}
}
self.cleanup();
}
}
24 changes: 12 additions & 12 deletions commons/zenoh-shm/tests/watchdog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,23 +62,23 @@ fn watchdog_confirmed_fn() -> impl Fn(usize, usize) -> ZResult<()> + Clone + Sen
}
}

#[test]
#[ignore]
#[test]
fn watchdog_confirmed() {
execute_concurrent(1, 10, watchdog_confirmed_fn());
}

#[test]
#[ignore]
#[test]
fn watchdog_confirmed_concurrent() {
execute_concurrent(1000, 10, watchdog_confirmed_fn());
}

// TODO: confirmation to dangling watchdog actually writes to potentially-existing
// other watchdog instance from other test running in the same process and changes it's behaviour,
// so we cannot run dangling test in parallel with anything else
#[test]
#[ignore]
#[test]
fn watchdog_confirmed_dangling() {
let allocated = GLOBAL_STORAGE
.read()
Expand Down Expand Up @@ -136,14 +136,14 @@ fn watchdog_validated_fn() -> impl Fn(usize, usize) -> ZResult<()> + Clone + Sen
}
}

#[test]
#[ignore]
#[test]
fn watchdog_validated() {
execute_concurrent(1, 10, watchdog_validated_fn());
}

#[test]
#[ignore]
#[test]
fn watchdog_validated_concurrent() {
execute_concurrent(1000, 10, watchdog_validated_fn());
}
Expand Down Expand Up @@ -176,14 +176,14 @@ fn watchdog_validated_invalid_without_confirmator_fn(
}
}

#[test]
#[ignore]
#[test]
fn watchdog_validated_invalid_without_confirmator() {
execute_concurrent(1, 10, watchdog_validated_invalid_without_confirmator_fn());
}

#[test]
#[ignore]
#[test]
fn watchdog_validated_invalid_without_confirmator_concurrent() {
execute_concurrent(
1000,
Expand Down Expand Up @@ -241,14 +241,14 @@ fn watchdog_validated_additional_confirmation_fn(
}
}

#[test]
#[ignore]
#[test]
fn watchdog_validated_additional_confirmation() {
execute_concurrent(1, 10, watchdog_validated_additional_confirmation_fn());
}

#[test]
#[ignore]
#[test]
fn watchdog_validated_additional_confirmation_concurrent() {
execute_concurrent(1000, 10, watchdog_validated_additional_confirmation_fn());
}
Expand Down Expand Up @@ -296,22 +296,22 @@ fn watchdog_validated_overloaded_system_fn(
}
}

#[test]
#[ignore]
#[test]
fn watchdog_validated_low_load() {
let _load = CpuLoad::low();
execute_concurrent(1000, 10, watchdog_validated_overloaded_system_fn());
}

#[test]
#[ignore]
#[test]
fn watchdog_validated_high_load() {
let _load = CpuLoad::optimal_high();
execute_concurrent(1000, 10, watchdog_validated_overloaded_system_fn());
}

#[test]
#[ignore]
#[test]
fn watchdog_validated_overloaded_system() {
let _load = CpuLoad::excessive();
execute_concurrent(1000, 10, watchdog_validated_overloaded_system_fn());
Expand Down

0 comments on commit 9746429

Please sign in to comment.