Skip to content

Commit

Permalink
Merge pull request #587 from elfenpiff/iox2-458-corrupted-services-cl…
Browse files Browse the repository at this point in the history
…eanup

[#458] corrupted services cleanup
  • Loading branch information
elfenpiff authored Jan 13, 2025
2 parents 6cba514 + f912afc commit 2eb752b
Show file tree
Hide file tree
Showing 9 changed files with 106 additions and 20 deletions.
2 changes: 2 additions & 0 deletions doc/release-notes/iceoryx2-unreleased.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
conflicts when merging.
-->

* Corrupted services are removed when they are part of a dead node
[#458](https://github.com/eclipse-iceoryx/iceoryx2/issues/458)
* Completion queue capacity exceeded when history > buffer
[#571](https://github.com/eclipse-iceoryx/iceoryx2/issues/571)
* Increase max supported shared memory size in Windows that restricts
Expand Down
2 changes: 2 additions & 0 deletions iceoryx2-ffi/cxx/include/iox2/enum_translation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1519,6 +1519,8 @@ constexpr auto from<int, iox2::NodeCleanupFailure>(const int value) noexcept ->
return iox2::NodeCleanupFailure::InternalError;
case iox2_node_cleanup_failure_e_INSUFFICIENT_PERMISSIONS:
return iox2::NodeCleanupFailure::InsufficientPermissions;
case iox2_node_cleanup_failure_e_VERSION_MISMATCH:
return iox2::NodeCleanupFailure::VersionMismatch;
}

IOX_UNREACHABLE();
Expand Down
2 changes: 2 additions & 0 deletions iceoryx2-ffi/cxx/include/iox2/node_failure_enums.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ enum class NodeCleanupFailure : uint8_t {
/// The stale resources of a dead [`Node`] could not be removed since the process does not have sufficient
/// permissions.
InsufficientPermissions,
/// Trying to cleanup resources from a [`Node`] node which was using a different iceoryx2 version.
VersionMismatch,
};

} // namespace iox2
Expand Down
3 changes: 3 additions & 0 deletions iceoryx2-ffi/ffi/src/api/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ pub enum iox2_node_cleanup_failure_e {
INTERNAL_ERROR,
/// The stale resources of a dead node could not be removed since the process does not have sufficient permissions.
INSUFFICIENT_PERMISSIONS,
/// Trying to cleanup resources from a dead node which was using a different iceoryx2 version.
VERSION_MISMATCH,
}

impl IntoCInt for NodeCleanupFailure {
Expand All @@ -100,6 +102,7 @@ impl IntoCInt for NodeCleanupFailure {
NodeCleanupFailure::InsufficientPermissions => {
iox2_node_cleanup_failure_e::INSUFFICIENT_PERMISSIONS
}
NodeCleanupFailure::VersionMismatch => iox2_node_cleanup_failure_e::VERSION_MISMATCH,
}) as c_int
}
}
Expand Down
60 changes: 56 additions & 4 deletions iceoryx2/src/node/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,9 @@ use crate::service::config_scheme::{
};
use crate::service::service_id::ServiceId;
use crate::service::service_name::ServiceName;
use crate::service::{self, remove_service_tag};
use crate::service::{
self, remove_service_tag, remove_static_service_config, ServiceRemoveNodeError,
};
use crate::signal_handling_mode::SignalHandlingMode;
use crate::{config::Config, service::config_scheme::node_details_config};
use core::cell::UnsafeCell;
Expand Down Expand Up @@ -271,6 +273,8 @@ pub enum NodeCleanupFailure {
InternalError,
/// The stale resources of a dead [`Node`] could not be removed since the process does not have sufficient permissions.
InsufficientPermissions,
/// Trying to cleanup resources from a dead [`Node`] which was using a different iceoryx2 version.
VersionMismatch,
}

impl core::fmt::Display for NodeCleanupFailure {
Expand Down Expand Up @@ -523,12 +527,58 @@ impl<Service: service::Service> DeadNodeView<Service> {
}
let cleaner = cleaner.unwrap();

let mut cleanup_failure = Ok(());
let remove_node_from_service = |service_id: &ServiceId| {
if Service::__internal_remove_node_from_service(self.id(), service_id, config).is_ok() {
if let Err(e) = remove_service_tag::<Service>(self.id(), service_id, config) {
match Service::__internal_remove_node_from_service(self.id(), service_id, config) {
Ok(()) => {
if let Err(e) = remove_service_tag::<Service>(self.id(), service_id, config) {
debug!(from self,
"The service tag could not be removed from the dead node ({:?}).",
e);
}
}
Err(ServiceRemoveNodeError::VersionMismatch) => {
cleanup_failure = Err(NodeCleanupFailure::VersionMismatch);
debug!(from self,
"The service tag coult not be removed from the dead node ({:?}).",
"{msg} since the dead node was using a different iceoryx2 version.");
}
Err(ServiceRemoveNodeError::ServiceInCorruptedState) => {
debug!(from self,
"{msg} since the service itself is corrupted. Trying to remove the corrupted remainders of the service.");
match unsafe {
remove_static_service_config::<Service>(config, &service_id.0.into())
} {
Ok(v) => {
if let Err(e) =
remove_service_tag::<Service>(self.id(), service_id, config)
{
debug!(from self,
"The service tag could not be removed from the dead node ({:?}).",
e);
}

if v {
debug!(from self, "Successfully removed corrupted static service config.");
} else {
debug!(from self, "Corrupted static service config no longer exists, another instance might have cleaned it up.");
}
}
Err(NamedConceptRemoveError::InsufficientPermissions) => {
cleanup_failure = Err(NodeCleanupFailure::InsufficientPermissions);
debug!(from self,
"{msg} since the corrupted service remainders to could not be removed due to insufficient permissions.");
}
Err(e) => {
cleanup_failure = Err(NodeCleanupFailure::InternalError);
debug!(from self,
"{msg} since the corrupted service remainders to could not be removed due to an internal error ({:?}).", e);
}
}
}
Err(e) => {
cleanup_failure = Err(NodeCleanupFailure::InternalError);
debug!(from self,
"{msg} due to an internal error while removing the node from the service ({:?}).", e);
}
}
CallbackProgression::Continue
Expand All @@ -544,6 +594,8 @@ impl<Service: service::Service> DeadNodeView<Service> {
}
};

cleanup_failure?;

match remove_node::<Service>(*self.id(), config) {
Ok(_) => {
drop(cleaner);
Expand Down
6 changes: 6 additions & 0 deletions iceoryx2/src/service/builder/event.rs
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,12 @@ impl<ServiceType: service::Service> Builder<ServiceType> {
fail!(from self, with EventOpenError::ExceedsMaxNumberOfNodes,
"{} since it would exceed the maximum number of supported nodes.", msg);
}
Err(OpenDynamicStorageFailure::DynamicStorageOpenError(
DynamicStorageOpenError::DoesNotExist,
)) => {
fail!(from self, with EventOpenError::ServiceInCorruptedState,
"{} since the dynamic segment of the service is missing.", msg);
}
Err(e) => {
if self.base.is_service_available(msg)?.is_none() {
fail!(from self, with EventOpenError::DoesNotExist,
Expand Down
6 changes: 6 additions & 0 deletions iceoryx2/src/service/builder/publish_subscribe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -638,6 +638,12 @@ impl<Payload: Debug + ?Sized, UserHeader: Debug, ServiceType: service::Service>
fail!(from self, with PublishSubscribeOpenError::ExceedsMaxNumberOfNodes,
"{} since it would exceed the maximum number of supported nodes.", msg);
}
Err(OpenDynamicStorageFailure::DynamicStorageOpenError(
DynamicStorageOpenError::DoesNotExist,
)) => {
fail!(from self, with PublishSubscribeOpenError::ServiceInCorruptedState,
"{} since the dynamic segment of the service is missing.", msg);
}
Err(e) => {
if self.is_service_available(msg)?.is_none() {
fail!(from self, with PublishSubscribeOpenError::DoesNotExist,
Expand Down
31 changes: 23 additions & 8 deletions iceoryx2/src/service/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ use self::service_name::ServiceName;
pub(crate) enum ServiceRemoveNodeError {
VersionMismatch,
InternalError,
ServiceInCorruptedState,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
Expand Down Expand Up @@ -335,7 +336,6 @@ impl<S: Service> Drop for ServiceState<S> {

pub(crate) mod internal {
use builder::event::EventOpenError;
use config_scheme::static_config_storage_config;
use dynamic_config::{PortCleanupAction, RemoveDeadNodeResult};
use port_factory::PortFactory;

Expand Down Expand Up @@ -445,7 +445,11 @@ pub(crate) mod internal {

let dynamic_config = match open_dynamic_config::<S>(config, service_id) {
Ok(Some(c)) => c,
Ok(None) => return Ok(()),
Ok(None) => {
fail!(from origin,
with ServiceRemoveNodeError::ServiceInCorruptedState,
"{} since the dynamic service segment is missing - service seems to be in a corrupted state.", msg);
}
Err(ServiceDetailsError::VersionMismatch) => {
fail!(from origin, with ServiceRemoveNodeError::VersionMismatch,
"{} since the service version does not match.", msg);
Expand Down Expand Up @@ -509,12 +513,7 @@ pub(crate) mod internal {
};

if remove_service {
match unsafe {
<S::StaticStorage as NamedConceptMgmt>::remove_cfg(
&service_id.0.into(),
&static_config_storage_config::<S>(config),
)
} {
match unsafe { remove_static_service_config::<S>(config, &service_id.0.into()) } {
Ok(_) => {
debug!(from origin, "Remove unused service.");
dynamic_config.acquire_ownership()
Expand Down Expand Up @@ -670,6 +669,22 @@ pub trait Service: Debug + Sized + internal::ServiceInternal<Self> {
}
}

pub(crate) unsafe fn remove_static_service_config<S: Service>(
config: &config::Config,
uuid: &FileName,
) -> Result<bool, NamedConceptRemoveError> {
let msg = "Unable to remove static service config";
let origin = "Service::remove_static_service_config()";
let static_storage_config = config_scheme::static_config_storage_config::<S>(config);

match <S::StaticStorage as NamedConceptMgmt>::remove_cfg(uuid, &static_storage_config) {
Ok(v) => Ok(v),
Err(e) => {
fail!(from origin, with e, "{msg} due to ({:?}).", e);
}
}
}

fn details<S: Service>(
config: &config::Config,
uuid: &FileName,
Expand Down
14 changes: 6 additions & 8 deletions iceoryx2/src/service/naming_scheme.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,9 @@ use iceoryx2_bb_system_types::file_name::FileName;
pub(crate) fn event_concept_name(listener_id: &UniqueListenerId) -> FileName {
let msg = "The system does not support the required file name length for the listeners event concept name.";
let origin = "event_concept_name()";
let mut file = fatal_panic!(from origin, when FileName::new(listener_id.0.pid().to_string().as_bytes()), "{}", msg);
fatal_panic!(from origin, when file.push(b'_'), "{}", msg);
fatal_panic!(from origin, when file.push_bytes(listener_id.0.value().to_string().as_bytes()), "{}", msg);
file
fatal_panic!(from origin,
when FileName::new(listener_id.0.value().to_string().as_bytes()),
"{}", msg)
}

pub(crate) fn connection_name(
Expand Down Expand Up @@ -55,8 +54,7 @@ pub(crate) fn data_segment_name(publisher_id: &UniquePublisherId) -> FileName {
let msg = "The system does not support the required file name length for the publishers data segment.";
let origin = "data_segment_name()";

let mut file = fatal_panic!(from origin, when FileName::new(publisher_id.0.pid().to_string().as_bytes()), "{}", msg);
fatal_panic!(from origin, when file.push(b'_'), "{}", msg);
fatal_panic!(from origin, when file.push_bytes(publisher_id.0.value().to_string().as_bytes()), "{}", msg);
file
fatal_panic!(from origin,
when FileName::new(publisher_id.0.value().to_string().as_bytes()),
"{}", msg)
}

0 comments on commit 2eb752b

Please sign in to comment.