diff --git a/crates/libcgroups/src/common.rs b/crates/libcgroups/src/common.rs index 76bcc14f0..695af77b9 100644 --- a/crates/libcgroups/src/common.rs +++ b/crates/libcgroups/src/common.rs @@ -11,6 +11,7 @@ use oci_spec::runtime::LinuxResources; use oci_spec::runtime::{ LinuxDevice, LinuxDeviceBuilder, LinuxDeviceCgroup, LinuxDeviceCgroupBuilder, LinuxDeviceType, }; +use serde::{Deserialize, Serialize}; use super::stats::Stats; use super::{systemd, v1, v2}; @@ -326,7 +327,7 @@ pub enum CreateCgroupSetupError { Systemd(#[from] systemd::manager::SystemdManagerError), } -#[derive(Clone)] +#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)] pub struct CgroupConfig { pub cgroup_path: PathBuf, pub systemd_cgroup: bool, diff --git a/crates/libcontainer/src/config.rs b/crates/libcontainer/src/config.rs index 37f1508f0..5b3d3d992 100644 --- a/crates/libcontainer/src/config.rs +++ b/crates/libcontainer/src/config.rs @@ -5,8 +5,6 @@ use std::path::{Path, PathBuf}; use oci_spec::runtime::{Hooks, Spec}; use serde::{Deserialize, Serialize}; -use crate::utils; - #[derive(Debug, thiserror::Error)] pub enum ConfigError { #[error("failed to save config")] @@ -43,20 +41,17 @@ const YOUKI_CONFIG_NAME: &str = "youki_config.json"; #[non_exhaustive] pub struct YoukiConfig { pub hooks: Option, - pub cgroup_path: PathBuf, + pub cgroup_config: Option, } impl<'a> YoukiConfig { - pub fn from_spec(spec: &'a Spec, container_id: &str) -> Result { + pub fn from_spec( + spec: &'a Spec, + cgroup_config: Option, + ) -> Result { Ok(YoukiConfig { hooks: spec.hooks().clone(), - cgroup_path: utils::get_cgroup_path( - spec.linux() - .as_ref() - .ok_or(ConfigError::MissingLinux)? - .cgroups_path(), - container_id, - ), + cgroup_config, }) } @@ -106,13 +101,15 @@ mod tests { fn test_config_from_spec() -> Result<()> { let container_id = "sample"; let spec = Spec::default(); - let config = YoukiConfig::from_spec(&spec, container_id)?; + let cgroup_config = libcgroups::common::CgroupConfig { + cgroup_path: PathBuf::from(format!(":youki:{container_id}")), + systemd_cgroup: true, + container_name: container_id.to_owned(), + }; + let config = YoukiConfig::from_spec(&spec, Some(cgroup_config.clone()))?; assert_eq!(&config.hooks, spec.hooks()); - dbg!(&config.cgroup_path); - assert_eq!( - config.cgroup_path, - PathBuf::from(format!(":youki:{container_id}")) - ); + dbg!(&config.cgroup_config); + assert_eq!(config.cgroup_config, Some(cgroup_config)); Ok(()) } @@ -121,7 +118,12 @@ mod tests { let container_id = "sample"; let tmp = tempfile::tempdir().expect("create temp dir"); let spec = Spec::default(); - let config = YoukiConfig::from_spec(&spec, container_id)?; + let cgroup_config = libcgroups::common::CgroupConfig { + cgroup_path: PathBuf::from(format!(":youki:{container_id}")), + systemd_cgroup: true, + container_name: container_id.to_owned(), + }; + let config = YoukiConfig::from_spec(&spec, Some(cgroup_config))?; config.save(&tmp)?; let act = YoukiConfig::load(&tmp)?; assert_eq!(act, config); diff --git a/crates/libcontainer/src/container/builder_impl.rs b/crates/libcontainer/src/container/builder_impl.rs index 9c185978e..8e0a150cf 100644 --- a/crates/libcontainer/src/container/builder_impl.rs +++ b/crates/libcontainer/src/container/builder_impl.rs @@ -10,6 +10,7 @@ use oci_spec::runtime::Spec; use super::{Container, ContainerStatus}; use crate::error::{CreateContainerError, LibcontainerError, MissingSpecError}; +use crate::hooks; use crate::notify_socket::NotifyListener; use crate::process::args::{ContainerArgs, ContainerType}; use crate::process::intel_rdt::delete_resctrl_subdirectory; @@ -17,17 +18,14 @@ use crate::process::{self}; use crate::syscall::syscall::SyscallType; use crate::user_ns::UserNamespaceConfig; use crate::workload::Executor; -use crate::{hooks, utils}; pub(super) struct ContainerBuilderImpl { /// Flag indicating if an init or a tenant container should be created pub container_type: ContainerType, /// Interface to operating system primitives pub syscall: SyscallType, - /// Flag indicating if systemd should be used for cgroup management - pub use_systemd: bool, - /// Id of the container - pub container_id: String, + /// Interface to operating system primitives + pub cgroup_config: Option, /// OCI compliant runtime spec pub spec: Rc, /// Root filesystem of the container @@ -41,8 +39,6 @@ pub(super) struct ContainerBuilderImpl { pub user_ns_config: Option, /// Path to the Unix Domain Socket to communicate container start pub notify_path: PathBuf, - /// Container state - pub container: Option, /// File descriptos preserved/passed to the container init process. pub preserve_fds: i32, /// If the container is to be run in detached mode @@ -66,42 +62,28 @@ impl ContainerBuilderImpl { Err(outer) => { // Only the init container should be cleaned up in the case of // an error. - let cleanup_err = if self.is_init_container() { - self.cleanup_container().err() - } else { - None - }; + let cleanup_err = + if let ContainerType::InitContainer { container } = &self.container_type { + cleanup_container(self.cgroup_config.clone(), container).err() + } else { + None + }; Err(CreateContainerError::new(outer, cleanup_err).into()) } } } - fn is_init_container(&self) -> bool { - matches!(self.container_type, ContainerType::InitContainer) - } - fn run_container(&mut self) -> Result { - let linux = self.spec.linux().as_ref().ok_or(MissingSpecError::Linux)?; - let cgroups_path = utils::get_cgroup_path(linux.cgroups_path(), &self.container_id); - let cgroup_config = libcgroups::common::CgroupConfig { - cgroup_path: cgroups_path, - systemd_cgroup: self.use_systemd || self.user_ns_config.is_some(), - container_name: self.container_id.to_owned(), - }; let process = self .spec .process() .as_ref() .ok_or(MissingSpecError::Process)?; - if matches!(self.container_type, ContainerType::InitContainer) { + if let ContainerType::InitContainer { container } = &self.container_type { if let Some(hooks) = self.spec.hooks() { - hooks::run_hooks( - hooks.create_runtime().as_ref(), - self.container.as_ref(), - None, - )? + hooks::run_hooks(hooks.create_runtime().as_ref(), container, None)? } } @@ -143,6 +125,7 @@ impl ContainerBuilderImpl { // going to be switching to a different security context. Thus setting // ourselves to be non-dumpable only breaks things (like rootless // containers), which is the recommendation from the kernel folks. + let linux = self.spec.linux().as_ref().ok_or(MissingSpecError::Linux)?; if linux.namespaces().is_some() { prctl::set_dumpable(false).map_err(|e| { LibcontainerError::Other(format!( @@ -156,16 +139,15 @@ impl ContainerBuilderImpl { // therefore we will have to move all the variable by value. Since self // is a shared reference, we have to clone these variables here. let container_args = ContainerArgs { - container_type: self.container_type, + container_type: self.container_type.clone(), syscall: self.syscall, spec: Rc::clone(&self.spec), rootfs: self.rootfs.to_owned(), console_socket: self.console_socket.as_ref().map(|c| c.as_raw_fd()), notify_listener, preserve_fds: self.preserve_fds, - container: self.container.to_owned(), user_ns_config: self.user_ns_config.to_owned(), - cgroup_config, + cgroup_config: self.cgroup_config.clone(), detached: self.detached, executor: self.executor.clone(), no_pivot: self.no_pivot, @@ -190,7 +172,7 @@ impl ContainerBuilderImpl { })?; } - if let Some(container) = &mut self.container { + if let ContainerType::InitContainer { container } = &mut self.container_type { // update status and pid of the container process container .set_status(ContainerStatus::Created) @@ -202,47 +184,42 @@ impl ContainerBuilderImpl { Ok(init_pid) } +} - fn cleanup_container(&self) -> Result<(), LibcontainerError> { - let linux = self.spec.linux().as_ref().ok_or(MissingSpecError::Linux)?; - let cgroups_path = utils::get_cgroup_path(linux.cgroups_path(), &self.container_id); - let cmanager = - libcgroups::common::create_cgroup_manager(libcgroups::common::CgroupConfig { - cgroup_path: cgroups_path, - systemd_cgroup: self.use_systemd || self.user_ns_config.is_some(), - container_name: self.container_id.to_string(), - })?; - - let mut errors = Vec::new(); +fn cleanup_container( + cgroup_config: Option, + container: &Container, +) -> Result<(), LibcontainerError> { + let mut errors = Vec::new(); + if let Some(cc) = cgroup_config { + let cmanager = libcgroups::common::create_cgroup_manager(cc)?; if let Err(e) = cmanager.remove() { tracing::error!(error = ?e, "failed to remove cgroup manager"); errors.push(e.to_string()); } + } - if let Some(container) = &self.container { - if let Some(true) = container.clean_up_intel_rdt_subdirectory() { - if let Err(e) = delete_resctrl_subdirectory(container.id()) { - tracing::error!(id = ?container.id(), error = ?e, "failed to delete resctrl subdirectory"); - errors.push(e.to_string()); - } - } - - if container.root.exists() { - if let Err(e) = fs::remove_dir_all(&container.root) { - tracing::error!(container_root = ?container.root, error = ?e, "failed to delete container root"); - errors.push(e.to_string()); - } - } + if let Some(true) = container.clean_up_intel_rdt_subdirectory() { + if let Err(e) = delete_resctrl_subdirectory(container.id()) { + tracing::error!(id = ?container.id(), error = ?e, "failed to delete resctrl subdirectory"); + errors.push(e.to_string()); } + } - if !errors.is_empty() { - return Err(LibcontainerError::Other(format!( - "failed to cleanup container: {}", - errors.join(";") - ))); + if container.root.exists() { + if let Err(e) = fs::remove_dir_all(&container.root) { + tracing::error!(container_root = ?container.root, error = ?e, "failed to delete container root"); + errors.push(e.to_string()); } + } - Ok(()) + if !errors.is_empty() { + return Err(LibcontainerError::Other(format!( + "failed to cleanup container: {}", + errors.join(";") + ))); } + + Ok(()) } diff --git a/crates/libcontainer/src/container/container.rs b/crates/libcontainer/src/container/container.rs index 7a05c3495..fbe3acf5a 100644 --- a/crates/libcontainer/src/container/container.rs +++ b/crates/libcontainer/src/container/container.rs @@ -121,15 +121,6 @@ impl Container { self } - pub fn systemd(&self) -> bool { - self.state.use_systemd - } - - pub fn set_systemd(&mut self, should_use: bool) -> &mut Self { - self.state.use_systemd = should_use; - self - } - pub fn set_clean_up_intel_rdt_directory(&mut self, clean_up: bool) -> &mut Self { self.state.clean_up_intel_rdt_subdirectory = Some(clean_up); self @@ -282,16 +273,6 @@ mod tests { assert_eq!(container.state.annotations, Some(annotations)); } - #[test] - fn test_get_set_systemd() { - let mut container = Container::default(); - assert!(!container.systemd()); - container.set_systemd(true); - assert!(container.systemd()); - container.set_systemd(false); - assert!(!container.systemd()); - } - #[test] fn test_get_set_creator() { let mut container = Container::default(); @@ -331,7 +312,7 @@ mod tests { let tmp_dir = tempfile::tempdir().unwrap(); use oci_spec::runtime::Spec; let spec = Spec::default(); - let config = YoukiConfig::from_spec(&spec, "123").context("convert spec to config")?; + let config = YoukiConfig::from_spec(&spec, None).context("convert spec to config")?; config.save(tmp_dir.path()).context("save config")?; let container = Container { diff --git a/crates/libcontainer/src/container/container_delete.rs b/crates/libcontainer/src/container/container_delete.rs index d246658c3..ece0ed189 100644 --- a/crates/libcontainer/src/container/container_delete.rs +++ b/crates/libcontainer/src/container/container_delete.rs @@ -5,7 +5,6 @@ use libcgroups::{self}; use nix::sys::signal; use super::{Container, ContainerStatus}; -use crate::config::YoukiConfig; use crate::error::LibcontainerError; use crate::hooks; use crate::process::intel_rdt::delete_resctrl_subdirectory; @@ -78,32 +77,26 @@ impl Container { } if self.root.exists() { - match YoukiConfig::load(&self.root) { + match self.spec() { Ok(config) => { tracing::debug!("config: {:?}", config); // remove the cgroup created for the container // check https://man7.org/linux/man-pages/man7/cgroups.7.html // creating and removing cgroups section for more information on cgroups - let cmanager = libcgroups::common::create_cgroup_manager( - libcgroups::common::CgroupConfig { - cgroup_path: config.cgroup_path.to_owned(), - systemd_cgroup: self.systemd(), - container_name: self.id().to_string(), - }, - )?; - cmanager.remove().map_err(|err| { - tracing::error!(cgroup_path = ?config.cgroup_path, "failed to remove cgroup due to: {err:?}"); - err - })?; + if let Some(cc) = config.cgroup_config { + let cmanager = libcgroups::common::create_cgroup_manager(cc.clone())?; + cmanager.remove().map_err(|err| { + tracing::error!(cgroup_config = ?cc, "failed to remove cgroup due to: {err:?}"); + err + })?; + } if let Some(hooks) = config.hooks.as_ref() { - hooks::run_hooks(hooks.poststop().as_ref(), Some(self), None).map_err( - |err| { - tracing::error!(err = ?err, "failed to run post stop hooks"); - err - }, - )?; + hooks::run_hooks(hooks.poststop().as_ref(), self, None).map_err(|err| { + tracing::error!(err = ?err, "failed to run post stop hooks"); + err + })?; } } Err(err) => { diff --git a/crates/libcontainer/src/container/container_events.rs b/crates/libcontainer/src/container/container_events.rs index 83df282f4..9a4f39f62 100644 --- a/crates/libcontainer/src/container/container_events.rs +++ b/crates/libcontainer/src/container/container_events.rs @@ -34,12 +34,12 @@ impl Container { return Err(LibcontainerError::IncorrectStatus); } - let cgroup_manager = - libcgroups::common::create_cgroup_manager(libcgroups::common::CgroupConfig { - cgroup_path: self.spec()?.cgroup_path, - systemd_cgroup: self.systemd(), - container_name: self.id().to_string(), - })?; + let cgroup_config = match self.spec()?.cgroup_config { + Some(cc) => cc, + None => return Err(LibcontainerError::CgroupsMissing), + }; + + let cgroup_manager = libcgroups::common::create_cgroup_manager(cgroup_config)?; match stats { true => { let stats = cgroup_manager.stats()?; diff --git a/crates/libcontainer/src/container/container_kill.rs b/crates/libcontainer/src/container/container_kill.rs index fc5da3db9..d4ebe95fe 100644 --- a/crates/libcontainer/src/container/container_kill.rs +++ b/crates/libcontainer/src/container/container_kill.rs @@ -79,32 +79,30 @@ impl Container { // For cgroup V1, a frozon process cannot respond to signals, // so we need to thaw it. Only thaw the cgroup for SIGKILL. if self.status() == ContainerStatus::Paused && signal == signal::Signal::SIGKILL { - match get_cgroup_setup()? { - libcgroups::common::CgroupSetup::Legacy - | libcgroups::common::CgroupSetup::Hybrid => { - let cmanager = libcgroups::common::create_cgroup_manager( - libcgroups::common::CgroupConfig { - cgroup_path: self.spec()?.cgroup_path, - systemd_cgroup: self.systemd(), - container_name: self.id().to_string(), - }, - )?; - cmanager.freeze(libcgroups::common::FreezerState::Thawed)?; + if let Some(cgroup_config) = self.spec()?.cgroup_config { + match get_cgroup_setup()? { + libcgroups::common::CgroupSetup::Legacy + | libcgroups::common::CgroupSetup::Hybrid => { + let cmanager = libcgroups::common::create_cgroup_manager(cgroup_config)?; + cmanager.freeze(libcgroups::common::FreezerState::Thawed)?; + } + libcgroups::common::CgroupSetup::Unified => {} } - libcgroups::common::CgroupSetup::Unified => {} + } else { + return Err(LibcontainerError::CgroupsMissing); } } Ok(()) } fn kill_all_processes>(&self, signal: S) -> Result<(), LibcontainerError> { + let cgroup_config = match self.spec()?.cgroup_config { + Some(cc) => cc, + None => return Err(LibcontainerError::CgroupsMissing), + }; + let signal = signal.into().into_raw(); - let cmanager = - libcgroups::common::create_cgroup_manager(libcgroups::common::CgroupConfig { - cgroup_path: self.spec()?.cgroup_path, - systemd_cgroup: self.systemd(), - container_name: self.id().to_string(), - })?; + let cmanager = libcgroups::common::create_cgroup_manager(cgroup_config)?; if let Err(e) = cmanager.freeze(libcgroups::common::FreezerState::Frozen) { tracing::warn!( diff --git a/crates/libcontainer/src/container/container_pause.rs b/crates/libcontainer/src/container/container_pause.rs index 7dcfa7793..aed466012 100644 --- a/crates/libcontainer/src/container/container_pause.rs +++ b/crates/libcontainer/src/container/container_pause.rs @@ -32,12 +32,12 @@ impl Container { return Err(LibcontainerError::IncorrectStatus); } - let cmanager = - libcgroups::common::create_cgroup_manager(libcgroups::common::CgroupConfig { - cgroup_path: self.spec()?.cgroup_path, - systemd_cgroup: self.systemd(), - container_name: self.id().to_string(), - })?; + let cgroup_config = match self.spec()?.cgroup_config { + Some(cc) => cc, + None => return Err(LibcontainerError::CgroupsMissing), + }; + + let cmanager = libcgroups::common::create_cgroup_manager(cgroup_config)?; cmanager.freeze(FreezerState::Frozen)?; tracing::debug!("saving paused status"); diff --git a/crates/libcontainer/src/container/container_resume.rs b/crates/libcontainer/src/container/container_resume.rs index 48fd65d84..dc6cc4ad3 100644 --- a/crates/libcontainer/src/container/container_resume.rs +++ b/crates/libcontainer/src/container/container_resume.rs @@ -33,12 +33,12 @@ impl Container { return Err(LibcontainerError::IncorrectStatus); } - let cmanager = - libcgroups::common::create_cgroup_manager(libcgroups::common::CgroupConfig { - cgroup_path: self.spec()?.cgroup_path, - systemd_cgroup: self.systemd(), - container_name: self.id().to_string(), - })?; + let cgroup_config = match self.spec()?.cgroup_config { + Some(cc) => cc, + None => return Err(LibcontainerError::CgroupsMissing), + }; + + let cmanager = libcgroups::common::create_cgroup_manager(cgroup_config)?; // resume the frozen container cmanager.freeze(FreezerState::Thawed)?; diff --git a/crates/libcontainer/src/container/container_start.rs b/crates/libcontainer/src/container/container_start.rs index 8ba182eed..fb800ac29 100644 --- a/crates/libcontainer/src/container/container_start.rs +++ b/crates/libcontainer/src/container/container_start.rs @@ -1,7 +1,6 @@ use nix::sys::signal; use super::{Container, ContainerStatus}; -use crate::config::YoukiConfig; use crate::error::LibcontainerError; use crate::hooks; use crate::notify_socket::{NotifySocket, NOTIFY_FILE}; @@ -35,7 +34,7 @@ impl Container { return Err(LibcontainerError::IncorrectStatus); } - let config = YoukiConfig::load(&self.root).map_err(|err| { + let config = self.spec().map_err(|err| { tracing::error!( "failed to load runtime spec for container {}: {}", self.id(), @@ -47,7 +46,7 @@ impl Container { // While prestart is marked as deprecated in the OCI spec, the docker and integration test still // uses it. #[allow(deprecated)] - hooks::run_hooks(hooks.prestart().as_ref(), Some(self), None).map_err(|err| { + hooks::run_hooks(hooks.prestart().as_ref(), self, None).map_err(|err| { tracing::error!("failed to run pre start hooks: {}", err); // In the case where prestart hook fails, the runtime must // stop the container before generating an error and exiting. @@ -69,7 +68,7 @@ impl Container { // Run post start hooks. It runs after the container process is started. // It is called in the runtime namespace. if let Some(hooks) = config.hooks.as_ref() { - hooks::run_hooks(hooks.poststart().as_ref(), Some(self), Some(&self.root)).map_err( + hooks::run_hooks(hooks.poststart().as_ref(), self, Some(&self.root)).map_err( |err| { tracing::error!("failed to run post start hooks: {}", err); err diff --git a/crates/libcontainer/src/container/init_builder.rs b/crates/libcontainer/src/container/init_builder.rs index 4ff2094ed..310ee92ba 100644 --- a/crates/libcontainer/src/container/init_builder.rs +++ b/crates/libcontainer/src/container/init_builder.rs @@ -18,6 +18,7 @@ use crate::{apparmor, tty, user_ns, utils}; pub struct InitContainerBuilder { base: ContainerBuilder, bundle: PathBuf, + use_cgroups: bool, use_systemd: bool, detached: bool, no_pivot: bool, @@ -30,12 +31,19 @@ impl InitContainerBuilder { Self { base: builder, bundle, + use_cgroups: true, use_systemd: true, detached: true, no_pivot: false, } } + /// Sets if cgroups should be used at all (overrides systemd if false) + pub fn with_cgroups(mut self, should_use: bool) -> Self { + self.use_cgroups = should_use; + self + } + /// Sets if systemd should be used for managing cgroups pub fn with_systemd(mut self, should_use: bool) -> Self { self.use_systemd = should_use; @@ -58,9 +66,7 @@ impl InitContainerBuilder { let container_dir = self.create_container_dir()?; let mut container = self.create_container_state(&container_dir)?; - container - .set_systemd(self.use_systemd) - .set_annotations(spec.annotations().clone()); + container.set_annotations(spec.annotations().clone()); let notify_path = container_dir.join(NOTIFY_FILE); // convert path of root file system of the container to absolute path @@ -81,24 +87,36 @@ impl InitContainerBuilder { let user_ns_config = UserNamespaceConfig::new(&spec)?; - let config = YoukiConfig::from_spec(&spec, container.id())?; + let mut cgroup_config = None; + if self.use_cgroups { + let linux = spec.linux().as_ref().ok_or(MissingSpecError::Linux)?; + let cgroups_path = + utils::get_cgroup_path(linux.cgroups_path(), &self.base.container_id); + cgroup_config = Some(libcgroups::common::CgroupConfig { + cgroup_path: cgroups_path, + systemd_cgroup: self.use_systemd || user_ns_config.is_some(), + container_name: self.base.container_id.to_owned(), + }); + } + + let config = YoukiConfig::from_spec(&spec, cgroup_config.clone())?; config.save(&container_dir).map_err(|err| { tracing::error!(?container_dir, "failed to save config: {}", err); err })?; let mut builder_impl = ContainerBuilderImpl { - container_type: ContainerType::InitContainer, + container_type: ContainerType::InitContainer { + container: container.clone(), + }, syscall: self.base.syscall, - container_id: self.base.container_id, pid_file: self.base.pid_file, console_socket: csocketfd, - use_systemd: self.use_systemd, + cgroup_config, spec: Rc::new(spec), rootfs, user_ns_config, notify_path, - container: Some(container.clone()), preserve_fds: self.base.preserve_fds, detached: self.detached, executor: self.base.executor, diff --git a/crates/libcontainer/src/container/state.rs b/crates/libcontainer/src/container/state.rs index 4f0bc07aa..05701404e 100644 --- a/crates/libcontainer/src/container/state.rs +++ b/crates/libcontainer/src/container/state.rs @@ -112,8 +112,6 @@ pub struct State { // User that created the container #[serde(skip_serializing_if = "Option::is_none")] pub creator: Option, - // Specifies if systemd should be used to manage cgroups - pub use_systemd: bool, // Specifies if the Intel RDT subdirectory needs be cleaned up. pub clean_up_intel_rdt_subdirectory: Option, } @@ -136,7 +134,6 @@ impl State { annotations: Some(HashMap::default()), created: None, creator: None, - use_systemd: false, clean_up_intel_rdt_subdirectory: None, } } diff --git a/crates/libcontainer/src/container/tenant_builder.rs b/crates/libcontainer/src/container/tenant_builder.rs index 22b7eff2a..410f2a740 100644 --- a/crates/libcontainer/src/container/tenant_builder.rs +++ b/crates/libcontainer/src/container/tenant_builder.rs @@ -118,7 +118,7 @@ impl TenantContainerBuilder { // get file descriptors of console socket let csocketfd = self.setup_tty_socket(&container_dir)?; - let use_systemd = self.should_use_systemd(&container); + let cgroup_config = container.spec()?.cgroup_config; let user_ns_config = UserNamespaceConfig::new(&spec)?; let (read_end, write_end) = @@ -129,15 +129,13 @@ impl TenantContainerBuilder { exec_notify_fd: write_end.as_raw_fd(), }, syscall: self.base.syscall, - container_id: self.base.container_id, pid_file: self.base.pid_file, console_socket: csocketfd, - use_systemd, + cgroup_config, spec: Rc::new(spec), rootfs, user_ns_config, notify_path: notify_path.clone(), - container: None, preserve_fds: self.base.preserve_fds, detached: self.detached, executor: self.base.executor, @@ -491,10 +489,6 @@ impl TenantContainerBuilder { Ok(tenant_namespaces) } - fn should_use_systemd(&self, container: &Container) -> bool { - container.systemd() - } - fn setup_notify_listener(container_dir: &Path) -> Result { let notify_name = Self::generate_name(container_dir, TENANT_NOTIFY); let socket_path = container_dir.join(notify_name); diff --git a/crates/libcontainer/src/error.rs b/crates/libcontainer/src/error.rs index f68a79a16..ec07c38b4 100644 --- a/crates/libcontainer/src/error.rs +++ b/crates/libcontainer/src/error.rs @@ -14,6 +14,8 @@ pub enum MissingSpecError { pub enum LibcontainerError { #[error("failed to perform operation due to incorrect container status")] IncorrectStatus, + #[error("requested operation requires cgroups to be enabled on the container")] + CgroupsMissing, #[error("container already exists")] Exist, #[error("container state directory does not exist")] diff --git a/crates/libcontainer/src/hooks.rs b/crates/libcontainer/src/hooks.rs index 8233e2ab8..5423d65b3 100644 --- a/crates/libcontainer/src/hooks.rs +++ b/crates/libcontainer/src/hooks.rs @@ -23,8 +23,6 @@ pub enum HookError { Killed, #[error("failed to execute hook command due to a timeout")] Timeout, - #[error("container state is required to run hook")] - MissingContainerState, #[error("failed to write container state to stdin")] WriteContainerState(#[source] std::io::Error), } @@ -33,10 +31,10 @@ type Result = std::result::Result; pub fn run_hooks( hooks: Option<&Vec>, - container: Option<&Container>, + container: &Container, cwd: Option<&Path>, ) -> Result<()> { - let state = &(container.ok_or(HookError::MissingContainerState)?.state); + let state = &container.state; if let Some(hooks) = hooks { for hook in hooks { @@ -176,7 +174,7 @@ mod test { fn test_run_hook() -> Result<()> { { let default_container: Container = Default::default(); - run_hooks(None, Some(&default_container), None).context("Failed simple test")?; + run_hooks(None, &default_container, None).context("Failed simple test")?; } { @@ -185,7 +183,7 @@ mod test { let hook = HookBuilder::default().path("true").build()?; let hooks = Some(vec![hook]); - run_hooks(hooks.as_ref(), Some(&default_container), None).context("Failed true")?; + run_hooks(hooks.as_ref(), &default_container, None).context("Failed true")?; } { @@ -205,8 +203,7 @@ mod test { .env(vec![String::from("key=value")]) .build()?; let hooks = Some(vec![hook]); - run_hooks(hooks.as_ref(), Some(&default_container), None) - .context("Failed printenv test")?; + run_hooks(hooks.as_ref(), &default_container, None).context("Failed printenv test")?; } { @@ -224,7 +221,7 @@ mod test { ]) .build()?; let hooks = Some(vec![hook]); - run_hooks(hooks.as_ref(), Some(&default_container), Some(tmp.path())) + run_hooks(hooks.as_ref(), &default_container, Some(tmp.path())) .context("Failed pwd test")?; } @@ -248,7 +245,7 @@ mod test { .timeout(1) .build()?; let hooks = Some(vec![hook]); - match run_hooks(hooks.as_ref(), Some(&default_container), None) { + match run_hooks(hooks.as_ref(), &default_container, None) { Ok(_) => { bail!("The test expects the hook to error out with timeout. Should not execute cleanly"); } diff --git a/crates/libcontainer/src/process/args.rs b/crates/libcontainer/src/process/args.rs index 2ea0dc974..0f11a8f87 100644 --- a/crates/libcontainer/src/process/args.rs +++ b/crates/libcontainer/src/process/args.rs @@ -10,9 +10,10 @@ use crate::notify_socket::NotifyListener; use crate::syscall::syscall::SyscallType; use crate::user_ns::UserNamespaceConfig; use crate::workload::Executor; -#[derive(Debug, Copy, Clone)] + +#[derive(Debug, Clone)] pub enum ContainerType { - InitContainer, + InitContainer { container: Container }, TenantContainer { exec_notify_fd: RawFd }, } @@ -32,12 +33,10 @@ pub struct ContainerArgs { pub notify_listener: NotifyListener, /// File descriptors preserved/passed to the container init process. pub preserve_fds: i32, - /// Container state - pub container: Option, /// Options for new namespace creation pub user_ns_config: Option, /// Cgroup Manager Config - pub cgroup_config: CgroupConfig, + pub cgroup_config: Option, /// If the container is to be run in detached mode pub detached: bool, /// Manage the functions that actually run on the container diff --git a/crates/libcontainer/src/process/container_init_process.rs b/crates/libcontainer/src/process/container_init_process.rs index 0d16e6094..a31e8df64 100644 --- a/crates/libcontainer/src/process/container_init_process.rs +++ b/crates/libcontainer/src/process/container_init_process.rs @@ -355,7 +355,6 @@ pub fn container_init_process( utils::parse_env(proc.env().as_ref().unwrap_or(&vec![])); let rootfs_path = &args.rootfs; let hooks = spec.hooks().as_ref(); - let container = args.container.as_ref(); let namespaces = Namespaces::try_from(linux.namespaces().as_ref())?; let notify_listener = &args.notify_listener; @@ -395,7 +394,7 @@ pub fn container_init_process( let _ = prctl::set_no_new_privileges(true); } - if matches!(args.container_type, ContainerType::InitContainer) { + if let ContainerType::InitContainer { container } = &args.container_type { // create_container hook needs to be called after the namespace setup, but // before pivot_root is called. This runs in the container namespaces. if let Some(hooks) = hooks { @@ -691,7 +690,7 @@ pub fn container_init_process( // create_container hook needs to be called after the namespace setup, but // before pivot_root is called. This runs in the container namespaces. - if matches!(args.container_type, ContainerType::InitContainer) { + if let ContainerType::InitContainer { container } = &args.container_type { if let Some(hooks) = hooks { hooks::run_hooks(hooks.start_container().as_ref(), container, None).map_err(|err| { tracing::error!(?err, "failed to run start container hooks"); diff --git a/crates/libcontainer/src/process/container_intermediate_process.rs b/crates/libcontainer/src/process/container_intermediate_process.rs index 1bee6f3ee..ae245e1a6 100644 --- a/crates/libcontainer/src/process/container_intermediate_process.rs +++ b/crates/libcontainer/src/process/container_intermediate_process.rs @@ -49,8 +49,6 @@ pub fn container_intermediate_process( let spec = &args.spec; let linux = spec.linux().as_ref().ok_or(MissingSpecError::Linux)?; let namespaces = Namespaces::try_from(linux.namespaces().as_ref())?; - let cgroup_manager = libcgroups::common::create_cgroup_manager(args.cgroup_config.to_owned()) - .map_err(|e| IntermediateProcessError::Cgroup(e.to_string()))?; // this needs to be done before we create the init process, so that the init // process will already be captured by the cgroup. It also needs to be done @@ -62,11 +60,15 @@ pub fn container_intermediate_process( // In addition this needs to be done before we enter the cgroup namespace as // the cgroup of the process will form the root of the cgroup hierarchy in // the cgroup namespace. - apply_cgroups( - &cgroup_manager, - linux.resources().as_ref(), - matches!(args.container_type, ContainerType::InitContainer), - )?; + if let Some(cgroup_config) = &args.cgroup_config { + let cgroup_manager = libcgroups::common::create_cgroup_manager(cgroup_config.to_owned()) + .map_err(|e| IntermediateProcessError::Cgroup(e.to_string()))?; + apply_cgroups( + &cgroup_manager, + linux.resources().as_ref(), + matches!(args.container_type, ContainerType::InitContainer { .. }), + )?; + } // if new user is specified in specification, this will be true and new // namespace will be created, check diff --git a/crates/libcontainer/src/process/container_main_process.rs b/crates/libcontainer/src/process/container_main_process.rs index fd5dfcb1c..6302aa1c5 100644 --- a/crates/libcontainer/src/process/container_main_process.rs +++ b/crates/libcontainer/src/process/container_main_process.rs @@ -1,7 +1,7 @@ use nix::sys::wait::{waitpid, WaitStatus}; use nix::unistd::Pid; -use crate::process::args::ContainerArgs; +use crate::process::args::{ContainerArgs, ContainerType}; use crate::process::fork::{self, CloneCb}; use crate::process::intel_rdt::setup_intel_rdt; use crate::process::{channel, container_intermediate_process}; @@ -126,18 +126,19 @@ pub fn container_main_process(container_args: &ContainerArgs) -> Result<(Pid, bo if let Some(linux) = container_args.spec.linux() { #[cfg(feature = "libseccomp")] if let Some(seccomp) = linux.seccomp() { + let container_state = match &container_args.container_type { + ContainerType::InitContainer { container } => &container.state, + ContainerType::TenantContainer { .. } => { + return Err(ProcessError::ContainerStateRequired) + } + }; let state = crate::container::ContainerProcessState { oci_version: container_args.spec.version().to_string(), // runc hardcode the `seccompFd` name for fds. fds: vec![String::from("seccompFd")], pid: init_pid.as_raw(), metadata: seccomp.listener_metadata().to_owned().unwrap_or_default(), - state: container_args - .container - .as_ref() - .ok_or(ProcessError::ContainerStateRequired)? - .state - .clone(), + state: container_state.clone(), }; crate::process::seccomp_listener::sync_seccomp( seccomp, @@ -148,10 +149,10 @@ pub fn container_main_process(container_args: &ContainerArgs) -> Result<(Pid, bo } if let Some(intel_rdt) = linux.intel_rdt() { - let container_id = container_args - .container - .as_ref() - .map(|container| container.id()); + let container_id = match &container_args.container_type { + ContainerType::InitContainer { container } => Some(container.id()), + ContainerType::TenantContainer { .. } => None, + }; need_to_clean_up_intel_rdt_subdirectory = setup_intel_rdt(container_id, &init_pid, intel_rdt)?; } diff --git a/crates/liboci-cli/src/lib.rs b/crates/liboci-cli/src/lib.rs index 5e5b2eb9a..b4c30e19c 100644 --- a/crates/liboci-cli/src/lib.rs +++ b/crates/liboci-cli/src/lib.rs @@ -94,4 +94,7 @@ pub struct GlobalOpts { /// Enable systemd cgroup manager, rather then use the cgroupfs directly. #[clap(short, long)] pub systemd_cgroup: bool, + /// Entirely disable cgroup manipulation (breaks some operations, e.g. pause) + #[clap(short, long)] + pub disable_cgroups: bool, } diff --git a/crates/youki/src/commands/create.rs b/crates/youki/src/commands/create.rs index 41eda151a..f377895b2 100644 --- a/crates/youki/src/commands/create.rs +++ b/crates/youki/src/commands/create.rs @@ -13,7 +13,12 @@ use crate::workload::executor::default_executor; // can be given impression that is is running on a complete system, but on the system which // it is running, it is just another process, and has attributes such as pid, file descriptors, etc. // associated with it like any other process. -pub fn create(args: Create, root_path: PathBuf, systemd_cgroup: bool) -> Result<()> { +pub fn create( + args: Create, + root_path: PathBuf, + systemd_cgroup: bool, + use_cgroups: bool, +) -> Result<()> { ContainerBuilder::new(args.container_id.clone(), SyscallType::default()) .with_executor(default_executor()) .with_pid_file(args.pid_file.as_ref())? @@ -23,6 +28,7 @@ pub fn create(args: Create, root_path: PathBuf, systemd_cgroup: bool) -> Result< .validate_id()? .as_init(&args.bundle) .with_systemd(systemd_cgroup) + .with_cgroups(use_cgroups) .with_detach(true) .with_no_pivot(args.no_pivot) .build()?; diff --git a/crates/youki/src/commands/mod.rs b/crates/youki/src/commands/mod.rs index 08044b7fb..10f476aa4 100644 --- a/crates/youki/src/commands/mod.rs +++ b/crates/youki/src/commands/mod.rs @@ -57,11 +57,8 @@ fn create_cgroup_manager>( container_id: &str, ) -> Result { let container = load_container(root_path, container_id)?; - Ok(libcgroups::common::create_cgroup_manager( - libcgroups::common::CgroupConfig { - cgroup_path: container.spec()?.cgroup_path, - systemd_cgroup: container.systemd(), - container_name: container.id().to_string(), - }, - )?) + match container.spec()?.cgroup_config { + Some(cc) => Ok(libcgroups::common::create_cgroup_manager(cc)?), + None => bail!("cannot use cgroups on container started without them"), + } } diff --git a/crates/youki/src/commands/run.rs b/crates/youki/src/commands/run.rs index 2f6d1812e..e08c16435 100644 --- a/crates/youki/src/commands/run.rs +++ b/crates/youki/src/commands/run.rs @@ -11,7 +11,7 @@ use nix::unistd::Pid; use crate::workload::executor::default_executor; -pub fn run(args: Run, root_path: PathBuf, systemd_cgroup: bool) -> Result { +pub fn run(args: Run, root_path: PathBuf, systemd_cgroup: bool, use_cgroups: bool) -> Result { let mut container = ContainerBuilder::new(args.container_id.clone(), SyscallType::default()) .with_executor(default_executor()) .with_pid_file(args.pid_file.as_ref())? @@ -21,6 +21,7 @@ pub fn run(args: Run, root_path: PathBuf, systemd_cgroup: bool) -> Result { .validate_id()? .as_init(&args.bundle) .with_systemd(systemd_cgroup) + .with_cgroups(use_cgroups) .with_detach(args.detach) .with_no_pivot(args.no_pivot) .build()?; diff --git a/crates/youki/src/main.rs b/crates/youki/src/main.rs index 27a515b2c..40eac1d2b 100644 --- a/crates/youki/src/main.rs +++ b/crates/youki/src/main.rs @@ -101,11 +101,12 @@ fn main() -> Result<()> { ); let root_path = rootpath::determine(opts.global.root)?; let systemd_cgroup = opts.global.systemd_cgroup; + let use_cgroups = !opts.global.disable_cgroups; let cmd_result = match opts.subcmd { SubCommand::Standard(cmd) => match *cmd { StandardCmd::Create(create) => { - commands::create::create(create, root_path, systemd_cgroup) + commands::create::create(create, root_path, systemd_cgroup, use_cgroups) } StandardCmd::Start(start) => commands::start::start(start, root_path), StandardCmd::Kill(kill) => commands::kill::kill(kill, root_path), @@ -130,14 +131,16 @@ fn main() -> Result<()> { CommonCmd::Pause(pause) => commands::pause::pause(pause, root_path), CommonCmd::Ps(ps) => commands::ps::ps(ps, root_path), CommonCmd::Resume(resume) => commands::resume::resume(resume, root_path), - CommonCmd::Run(run) => match commands::run::run(run, root_path, systemd_cgroup) { - Ok(exit_code) => std::process::exit(exit_code), - Err(e) => { - tracing::error!("error in executing command: {:?}", e); - eprintln!("run failed : {e}"); - std::process::exit(-1); + CommonCmd::Run(run) => { + match commands::run::run(run, root_path, systemd_cgroup, use_cgroups) { + Ok(exit_code) => std::process::exit(exit_code), + Err(e) => { + tracing::error!("error in executing command: {:?}", e); + eprintln!("run failed : {e}"); + std::process::exit(-1); + } } - }, + } CommonCmd::Spec(spec) => commands::spec_json::spec(spec), CommonCmd::Update(update) => commands::update::update(update, root_path), },