diff --git a/CHANGELOG.md b/CHANGELOG.md index 122f3dcf1a8..c52d3d77a21 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,43 @@ This project uses [*towncrier*](https://towncrier.readthedocs.io/) and the chang +## [3.128.0](https://github.com/metalbear-co/mirrord/tree/3.128.0) - 2024-12-19 + + +### Added + +- Added to mirrord config a new experimental field + `.experimental.readonly_file_buffer`. If set to a value greater than 0, + mirrord will fetch remote readonly files in chunks of at least this size (in bytes). + This is to improve performance with applications that make many small reads + from remote files. + [#2069](https://github.com/metalbear-co/mirrord/issues/2069) +- Added `mirrord container-ext` command that should be used by extension and + works similarly to `mirrord ext` but for containers. +- Added runAsNonRoot and RO file system to operator deployment +- Added custom resource definition for cluster-wide mirrord policy - + `MirrordClusterPolicy`. +- Added mapping option for env vars config, allowing the user to map multiple env + vars to another value based on regexes. + [#2920](https://github.com/metalbear-co/mirrord/issues/2920) +- Added mkdir support + [#2221](https://github.com/metalbear-co/mirrord/issues/2221) + + +### Fixed + +- Added debugger port detection type for the node `--inspect`, `--inspect-wait` + and `--inspect-brk` flags. + [#2936](https://github.com/metalbear-co/mirrord/issues/2936) +- Fixed `mirrord operator setup` - added missing `/tmp` volume to the operator + deployment. + + +### Internal + +- Added E2E test for `MirrordClusterPolicy` that blocks incoming traffic + mirroring. + ## [3.127.0](https://github.com/metalbear-co/mirrord/tree/3.127.0) - 2024-12-10 diff --git a/Cargo.lock b/Cargo.lock index bc8172f72af..207675b71f3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2441,7 +2441,7 @@ dependencies = [ [[package]] name = "fileops" -version = "3.127.0" +version = "3.128.0" dependencies = [ "libc", ] @@ -3567,7 +3567,7 @@ checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" [[package]] name = "issue1317" -version = "3.127.0" +version = "3.128.0" dependencies = [ "actix-web", "env_logger 0.11.5", @@ -3577,7 +3577,7 @@ dependencies = [ [[package]] name = "issue1776" -version = "3.127.0" +version = "3.128.0" dependencies = [ "errno 0.3.10", "libc", @@ -3586,7 +3586,7 @@ dependencies = [ [[package]] name = "issue1776portnot53" -version = "3.127.0" +version = "3.128.0" dependencies = [ "libc", "socket2", @@ -3594,14 +3594,14 @@ dependencies = [ [[package]] name = "issue1899" -version = "3.127.0" +version = "3.128.0" dependencies = [ "libc", ] [[package]] name = "issue2001" -version = "3.127.0" +version = "3.128.0" dependencies = [ "libc", ] @@ -3950,7 +3950,7 @@ checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" [[package]] name = "listen_ports" -version = "3.127.0" +version = "3.128.0" [[package]] name = "litemap" @@ -4178,7 +4178,7 @@ dependencies = [ [[package]] name = "mirrord" -version = "3.127.0" +version = "3.128.0" dependencies = [ "actix-codec", "clap", @@ -4238,7 +4238,7 @@ dependencies = [ [[package]] name = "mirrord-agent" -version = "3.127.0" +version = "3.128.0" dependencies = [ "actix-codec", "async-trait", @@ -4298,7 +4298,7 @@ dependencies = [ [[package]] name = "mirrord-analytics" -version = "3.127.0" +version = "3.128.0" dependencies = [ "assert-json-diff", "base64 0.22.1", @@ -4312,7 +4312,7 @@ dependencies = [ [[package]] name = "mirrord-auth" -version = "3.127.0" +version = "3.128.0" dependencies = [ "bcder", "chrono", @@ -4333,7 +4333,7 @@ dependencies = [ [[package]] name = "mirrord-config" -version = "3.127.0" +version = "3.128.0" dependencies = [ "bimap", "bitflags 2.6.0", @@ -4356,7 +4356,7 @@ dependencies = [ [[package]] name = "mirrord-config-derive" -version = "3.127.0" +version = "3.128.0" dependencies = [ "proc-macro2", "proc-macro2-diagnostics", @@ -4366,7 +4366,7 @@ dependencies = [ [[package]] name = "mirrord-console" -version = "3.127.0" +version = "3.128.0" dependencies = [ "bincode", "drain", @@ -4382,7 +4382,7 @@ dependencies = [ [[package]] name = "mirrord-intproxy" -version = "3.127.0" +version = "3.128.0" dependencies = [ "bytes", "exponential-backoff", @@ -4399,6 +4399,7 @@ dependencies = [ "mirrord-protocol", "rand", "reqwest 0.12.9", + "rstest", "rustls 0.23.19", "rustls-pemfile 2.2.0", "semver 1.0.23", @@ -4412,7 +4413,7 @@ dependencies = [ [[package]] name = "mirrord-intproxy-protocol" -version = "3.127.0" +version = "3.128.0" dependencies = [ "bincode", "mirrord-protocol", @@ -4422,7 +4423,7 @@ dependencies = [ [[package]] name = "mirrord-kube" -version = "3.127.0" +version = "3.128.0" dependencies = [ "actix-codec", "async-stream", @@ -4452,7 +4453,7 @@ dependencies = [ [[package]] name = "mirrord-layer" -version = "3.127.0" +version = "3.128.0" dependencies = [ "actix-codec", "base64 0.22.1", @@ -4502,7 +4503,7 @@ dependencies = [ [[package]] name = "mirrord-layer-macro" -version = "3.127.0" +version = "3.128.0" dependencies = [ "proc-macro2", "quote", @@ -4511,7 +4512,7 @@ dependencies = [ [[package]] name = "mirrord-macros" -version = "3.127.0" +version = "3.128.0" dependencies = [ "proc-macro2", "proc-macro2-diagnostics", @@ -4521,7 +4522,7 @@ dependencies = [ [[package]] name = "mirrord-operator" -version = "3.127.0" +version = "3.128.0" dependencies = [ "base64 0.22.1", "bincode", @@ -4555,7 +4556,7 @@ dependencies = [ [[package]] name = "mirrord-progress" -version = "3.127.0" +version = "3.128.0" dependencies = [ "enum_dispatch", "indicatif", @@ -4565,7 +4566,7 @@ dependencies = [ [[package]] name = "mirrord-protocol" -version = "1.12.0" +version = "1.13.0" dependencies = [ "actix-codec", "bincode", @@ -4590,7 +4591,7 @@ dependencies = [ [[package]] name = "mirrord-sip" -version = "3.127.0" +version = "3.128.0" dependencies = [ "apple-codesign", "object 0.36.5", @@ -4603,7 +4604,7 @@ dependencies = [ [[package]] name = "mirrord-vpn" -version = "3.127.0" +version = "3.128.0" dependencies = [ "futures", "ipnet", @@ -4953,7 +4954,7 @@ dependencies = [ [[package]] name = "outgoing" -version = "3.127.0" +version = "3.128.0" [[package]] name = "outref" @@ -6084,14 +6085,14 @@ dependencies = [ [[package]] name = "rust-bypassed-unix-socket" -version = "3.127.0" +version = "3.128.0" dependencies = [ "tokio", ] [[package]] name = "rust-e2e-fileops" -version = "3.127.0" +version = "3.128.0" dependencies = [ "libc", ] @@ -6107,7 +6108,7 @@ dependencies = [ [[package]] name = "rust-unix-socket-client" -version = "3.127.0" +version = "3.128.0" dependencies = [ "tokio", ] diff --git a/Cargo.toml b/Cargo.toml index 11f578c8c28..da8a4e788d0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,7 +27,7 @@ resolver = "2" # latest commits on rustls suppress certificate verification [workspace.package] -version = "3.127.0" +version = "3.128.0" edition = "2021" license = "MIT" readme = "README.md" diff --git a/changelog.d/+cluster-policy.added.md b/changelog.d/+cluster-policy.added.md deleted file mode 100644 index 2725780086b..00000000000 --- a/changelog.d/+cluster-policy.added.md +++ /dev/null @@ -1 +0,0 @@ -Added custom resource definition for cluster-wide mirrord policy - `MirrordClusterPolicy`. diff --git a/changelog.d/+operator-deployment-strict.added.md b/changelog.d/+operator-deployment-strict.added.md deleted file mode 100644 index a72b1ccd58a..00000000000 --- a/changelog.d/+operator-deployment-strict.added.md +++ /dev/null @@ -1 +0,0 @@ -Add runAsNonRoot and RO file system to operator deployment diff --git a/changelog.d/+operator-tmp-volume.fixed.md b/changelog.d/+operator-tmp-volume.fixed.md deleted file mode 100644 index db28de877e5..00000000000 --- a/changelog.d/+operator-tmp-volume.fixed.md +++ /dev/null @@ -1 +0,0 @@ -Fixed `mirrord operator setup` - added missing `/tmp` volume to the operator deployment. diff --git a/changelog.d/2920.changed.md b/changelog.d/2920.changed.md deleted file mode 100644 index b03aff8d438..00000000000 --- a/changelog.d/2920.changed.md +++ /dev/null @@ -1 +0,0 @@ -Add mapping option for env vars config, allowing the user to map multiple env vars to another value based on regexes. diff --git a/mirrord-schema.json b/mirrord-schema.json index 63886909f4e..ec3d68d4766 100644 --- a/mirrord-schema.json +++ b/mirrord-schema.json @@ -575,6 +575,14 @@ "string", "null" ] + }, + "cli_prevent_cleanup": { + "title": "container.cli_prevent_cleanup {#container-cli_extra_args}", + "description": "Don't add `--rm` to sidecar command to prevent cleanup.", + "type": [ + "boolean", + "null" + ] } }, "additionalProperties": false @@ -815,6 +823,16 @@ "null" ] }, + "readonly_file_buffer": { + "title": "_experimental_ readonly_file_buffer {#experimental-readonly_file_buffer}", + "description": "Sets buffer size for readonly remote files (in bytes, for example 4096). If set, such files will be read in chunks and buffered locally. This improves performace when the user application reads data in small portions.\n\nSetting to 0 disables file buffering.\n\n", + "type": [ + "integer", + "null" + ], + "format": "uint64", + "minimum": 0.0 + }, "tcp_ping4_mock": { "title": "_experimental_ tcp_ping4_mock {#experimental-tcp_ping4_mock}", "description": "", diff --git a/mirrord/agent/src/file.rs b/mirrord/agent/src/file.rs index 0808cc1d382..f2ffdc37d68 100644 --- a/mirrord/agent/src/file.rs +++ b/mirrord/agent/src/file.rs @@ -243,6 +243,14 @@ impl FileManager { }) => Some(FileResponse::GetDEnts64( self.getdents64(remote_fd, buffer_size), )), + FileRequest::MakeDir(MakeDirRequest { pathname, mode }) => { + Some(FileResponse::MakeDir(self.mkdir(&pathname, mode))) + } + FileRequest::MakeDirAt(MakeDirAtRequest { + dirfd, + pathname, + mode, + }) => Some(FileResponse::MakeDir(self.mkdirat(dirfd, &pathname, mode))), }) } @@ -486,6 +494,46 @@ impl FileManager { }) } + pub(crate) fn mkdir(&mut self, path: &Path, mode: u32) -> RemoteResult<()> { + trace!("FileManager::mkdir -> path {:#?} | mode {:#?}", path, mode); + + let path = resolve_path(path, &self.root_path)?; + + match nix::unistd::mkdir(&path, nix::sys::stat::Mode::from_bits_truncate(mode)) { + Ok(_) => Ok(()), + Err(err) => Err(ResponseError::from(std::io::Error::from_raw_os_error( + err as i32, + ))), + } + } + + pub(crate) fn mkdirat(&mut self, dirfd: u64, path: &Path, mode: u32) -> RemoteResult<()> { + trace!( + "FileManager::mkdirat -> dirfd {:#?} | path {:#?} | mode {:#?}", + dirfd, + path, + mode + ); + + let relative_dir = self + .open_files + .get(&dirfd) + .ok_or(ResponseError::NotFound(dirfd))?; + + if let RemoteFile::Directory(relative_dir) = relative_dir { + let path = relative_dir.join(path); + + match nix::unistd::mkdir(&path, nix::sys::stat::Mode::from_bits_truncate(mode)) { + Ok(_) => Ok(()), + Err(err) => Err(ResponseError::from(std::io::Error::from_raw_os_error( + err as i32, + ))), + } + } else { + Err(ResponseError::NotDirectory(dirfd)) + } + } + pub(crate) fn seek(&mut self, fd: u64, seek_from: SeekFrom) -> RemoteResult { trace!( "FileManager::seek -> fd {:#?} | seek_from {:#?}", diff --git a/mirrord/analytics/src/lib.rs b/mirrord/analytics/src/lib.rs index 4ca01f2b6c6..bb05773544a 100644 --- a/mirrord/analytics/src/lib.rs +++ b/mirrord/analytics/src/lib.rs @@ -145,6 +145,12 @@ impl From for AnalyticValue { } } +impl From for AnalyticValue { + fn from(n: u64) -> Self { + AnalyticValue::Number(u32::try_from(n).unwrap_or(u32::MAX)) + } +} + impl From for AnalyticValue { fn from(n: usize) -> Self { AnalyticValue::Number(u32::try_from(n).unwrap_or(u32::MAX)) diff --git a/mirrord/cli/src/config.rs b/mirrord/cli/src/config.rs index 57572ace947..1570cfbca35 100644 --- a/mirrord/cli/src/config.rs +++ b/mirrord/cli/src/config.rs @@ -59,6 +59,10 @@ pub(super) enum Commands { #[command(hide = true, name = "ls")] ListTargets(Box), + /// mirrord container extension integration. + #[command(hide = true, name = "container-ext")] + ExtensionContainer(Box), + /// Extension execution - used by extension to execute binaries. #[command(hide = true, name = "ext")] ExtensionExec(Box), @@ -120,8 +124,8 @@ impl core::fmt::Display for FsMode { } } -#[derive(Args, Debug)] /// Parameters to override any values from mirrord-config as part of `exec` or `container` commands. +#[derive(Args, Debug)] pub(super) struct ExecParams { /// Parameters for the target #[clap(flatten)] @@ -337,10 +341,11 @@ impl ExecParams { } } +// `mirrord exec` command #[derive(Args, Debug)] pub(super) struct ExecArgs { #[clap(flatten)] - pub params: ExecParams, + pub params: Box, /// Binary to execute and connect with the remote pod. pub binary: String, @@ -759,7 +764,7 @@ pub(super) enum DiagnoseCommand { }, } -#[derive(Clone, Copy, Debug, Eq, PartialEq, ValueEnum)] +#[derive(Clone, Copy, Debug, Eq, PartialEq, ValueEnum, serde::Serialize)] /// Runtimes supported by the `mirrord container` command. pub(super) enum ContainerRuntime { Docker, @@ -777,12 +782,13 @@ impl std::fmt::Display for ContainerRuntime { } } +// `mirrord container` command #[derive(Args, Debug)] -/// Args for the `mirrord container` command. +#[clap(args_conflicts_with_subcommands = true)] pub(super) struct ContainerArgs { - #[clap(flatten)] /// Parameters to be passed to mirrord. - pub params: ExecParams, + #[clap(flatten)] + pub params: Box, /// Container command to be executed #[arg(trailing_var_arg = true)] @@ -790,16 +796,30 @@ pub(super) struct ContainerArgs { } impl ContainerArgs { + /// Unpack exec command to inner components [`RuntimeArgs`] and [`ExecParams`] + /// (need to parse [`RuntimeArgs`] here just to make clap happy with nested trailing_var_arg) pub fn into_parts(self) -> (RuntimeArgs, ExecParams) { let ContainerArgs { params, exec } = self; - let runtime_args = - RuntimeArgs::parse_from(std::iter::once("mirrord container --".into()).chain(exec)); + let runtime_args = RuntimeArgs::parse_from( + std::iter::once("mirrord container exec --".into()).chain(exec), + ); - (runtime_args, params) + (runtime_args, *params) } } +#[derive(Args, Debug)] +pub struct ExtensionContainerArgs { + /// Specify config file to use + #[arg(short = 'f', long, value_hint = ValueHint::FilePath)] + pub config_file: Option, + + /// Specify target + #[arg(short = 't')] + pub target: Option, +} + #[derive(Parser, Debug)] pub struct RuntimeArgs { /// Which kind of container runtime to use. @@ -808,12 +828,12 @@ pub struct RuntimeArgs { #[command(subcommand)] /// Command to use with `mirrord container`. - pub command: ContainerCommand, + pub command: ContainerRuntimeCommand, } /// Supported command for using mirrord with container runtimes. #[derive(Subcommand, Debug, Clone)] -pub(super) enum ContainerCommand { +pub(super) enum ContainerRuntimeCommand { /// Execute a ` run` command with mirrord loaded. Run { /// Arguments that will be propogated to underlying ` run` command. @@ -822,15 +842,15 @@ pub(super) enum ContainerCommand { }, } -impl ContainerCommand { +impl ContainerRuntimeCommand { pub fn run>(runtime_args: impl IntoIterator) -> Self { - ContainerCommand::Run { + ContainerRuntimeCommand::Run { runtime_args: runtime_args.into_iter().map(T::into).collect(), } } pub fn has_publish(&self) -> bool { - let ContainerCommand::Run { runtime_args } = self; + let ContainerRuntimeCommand::Run { runtime_args } = self; let mut hit_trailing_token = false; @@ -919,15 +939,15 @@ mod tests { let command = "mirrord container -t deploy/test podman run -it --rm debian"; let result = Cli::parse_from(command.split(' ')); - let Commands::Container(continaer) = result.commands else { + let Commands::Container(container) = result.commands else { panic!("cli command didn't parse into container command, got: {result:#?}") }; - let (runtime_args, _) = continaer.into_parts(); + let (runtime_args, _) = container.into_parts(); assert_eq!(runtime_args.runtime, ContainerRuntime::Podman); - let ContainerCommand::Run { runtime_args } = runtime_args.command; + let ContainerRuntimeCommand::Run { runtime_args } = runtime_args.command; assert_eq!(runtime_args, vec!["-it", "--rm", "debian"]); } @@ -937,15 +957,15 @@ mod tests { let command = "mirrord container -t deploy/test -- podman run -it --rm debian"; let result = Cli::parse_from(command.split(' ')); - let Commands::Container(continaer) = result.commands else { + let Commands::Container(container) = result.commands else { panic!("cli command didn't parse into container command, got: {result:#?}") }; - let (runtime_args, _) = continaer.into_parts(); + let (runtime_args, _) = container.into_parts(); assert_eq!(runtime_args.runtime, ContainerRuntime::Podman); - let ContainerCommand::Run { runtime_args } = runtime_args.command; + let ContainerRuntimeCommand::Run { runtime_args } = runtime_args.command; assert_eq!(runtime_args, vec!["-it", "--rm", "debian"]); } diff --git a/mirrord/cli/src/container.rs b/mirrord/cli/src/container.rs index 6555111e751..afffad9831d 100644 --- a/mirrord/cli/src/container.rs +++ b/mirrord/cli/src/container.rs @@ -1,9 +1,16 @@ -use std::{io::Write, net::SocketAddr, path::Path, process::Stdio, time::Duration}; +use std::{ + collections::HashMap, + io::Write, + net::SocketAddr, + ops::Not, + path::{Path, PathBuf}, + process::Stdio, + time::Duration, +}; +use clap::ValueEnum; use local_ip_address::local_ip; -use mirrord_analytics::{ - AnalyticsError, AnalyticsReporter, CollectAnalytics, ExecutionKind, Reporter, -}; +use mirrord_analytics::{AnalyticsError, AnalyticsReporter, ExecutionKind, Reporter}; use mirrord_config::{ external_proxy::{MIRRORD_EXTERNAL_TLS_CERTIFICATE_ENV, MIRRORD_EXTERNAL_TLS_KEY_ENV}, internal_proxy::{ @@ -12,7 +19,7 @@ use mirrord_config::{ }, LayerConfig, MIRRORD_CONFIG_FILE_ENV, }; -use mirrord_progress::{Progress, ProgressTracker, MIRRORD_PROGRESS_ENV}; +use mirrord_progress::{JsonProgress, Progress, ProgressTracker, MIRRORD_PROGRESS_ENV}; use tempfile::NamedTempFile; use tokio::{ io::{AsyncBufReadExt, AsyncReadExt, BufReader}, @@ -21,10 +28,10 @@ use tokio::{ use tracing::Level; use crate::{ - config::{ContainerCommand, ContainerRuntime, ExecParams, RuntimeArgs}, + config::{ContainerRuntime, ContainerRuntimeCommand, ExecParams, RuntimeArgs}, connection::AGENT_CONNECT_INFO_ENV_KEY, container::command_builder::RuntimeCommandBuilder, - error::{CliResult, ContainerError}, + error::{CliError, CliResult, ContainerError}, execution::{ MirrordExecution, LINUX_INJECTION_ENV_VAR, MIRRORD_CONNECT_TCP_ENV, MIRRORD_EXECUTION_KIND_ENV, @@ -50,9 +57,7 @@ fn format_command(command: &Command) -> String { /// Execute a [`Command`] and read first line from stdout #[tracing::instrument(level = Level::TRACE, ret)] -async fn exec_and_get_first_line( - command: &mut Command, -) -> CliResult, ContainerError> { +async fn exec_and_get_first_line(command: &mut Command) -> Result, ContainerError> { let mut child = command .stdin(Stdio::null()) .stdout(Stdio::piped()) @@ -111,7 +116,7 @@ async fn exec_and_get_first_line( /// Create a temp file with a json serialized [`LayerConfig`] to be loaded by container and external /// proxy #[tracing::instrument(level = Level::TRACE, ret)] -fn create_composed_config(config: &LayerConfig) -> CliResult { +fn create_composed_config(config: &LayerConfig) -> Result { let mut composed_config_file = tempfile::Builder::new() .suffix(".json") .tempfile() @@ -127,7 +132,7 @@ fn create_composed_config(config: &LayerConfig) -> CliResult, -) -> CliResult<(NamedTempFile, NamedTempFile), ContainerError> { +) -> Result<(NamedTempFile, NamedTempFile), ContainerError> { let geerated = rcgen::generate_simple_self_signed(subject_alt_names) .map_err(ContainerError::SelfSignedCertificate)?; @@ -154,25 +159,22 @@ async fn create_sidecar_intproxy( config: &LayerConfig, base_command: &RuntimeCommandBuilder, connection_info: Vec<(&str, &str)>, -) -> CliResult<(String, SocketAddr), ContainerError> { +) -> Result<(String, SocketAddr), ContainerError> { let mut sidecar_command = base_command.clone(); sidecar_command.add_env(MIRRORD_INTPROXY_CONTAINER_MODE_ENV, "true"); sidecar_command.add_envs(connection_info); - let sidecar_container_command = ContainerCommand::run( + let cleanup = config.container.cli_prevent_cleanup.not().then_some("--rm"); + + let sidecar_container_command = ContainerRuntimeCommand::run( config .container .cli_extra_args .iter() .map(String::as_str) - .chain([ - "--rm", - "-d", - &config.container.cli_image, - "mirrord", - "intproxy", - ]), + .chain(cleanup) + .chain(["-d", &config.container.cli_image, "mirrord", "intproxy"]), ); let (runtime_binary, sidecar_args) = sidecar_command @@ -258,42 +260,12 @@ async fn create_sidecar_intproxy( Ok((sidecar_container_id, intproxy_address)) } -/// Main entry point for the `mirrord container` command. -/// This spawns: "agent" - "external proxy" - "intproxy sidecar" - "execution container" -pub(crate) async fn container_command( - runtime_args: RuntimeArgs, - exec_params: ExecParams, - watch: drain::Watch, -) -> CliResult { - let mut progress = ProgressTracker::from_env("mirrord container"); - - if runtime_args.command.has_publish() { - progress.warning("mirrord container may have problems with \"-p\" directly container in command, please add to \"contanier.cli_extra_args\" in config if you are planning to publish ports"); - } - - progress.warning("mirrord container is currently an unstable feature"); - - for (name, value) in exec_params.as_env_vars()? { - std::env::set_var(name, value); - } - - std::env::set_var( - MIRRORD_EXECUTION_KIND_ENV, - (CONTAINER_EXECUTION_KIND as u32).to_string(), - ); - - let (mut config, mut context) = LayerConfig::from_env_with_warnings()?; - - let mut analytics = - AnalyticsReporter::only_error(config.telemetry, CONTAINER_EXECUTION_KIND, watch); - (&config).collect_analytics(analytics.get_mut()); - - config.verify(&mut context)?; - for warning in context.get_warnings() { - progress.warning(warning); - } +type TlsGuard = (NamedTempFile, NamedTempFile); - let _internal_proxy_tls_guards = if config.external_proxy.tls_enable +fn prepare_tls_certs_for_container( + config: &mut LayerConfig, +) -> CliResult<(Option, Option)> { + let internal_proxy_tls_guards = if config.external_proxy.tls_enable && (config.internal_proxy.client_tls_certificate.is_none() || config.internal_proxy.client_tls_key.is_none()) { @@ -314,7 +286,7 @@ pub(crate) async fn container_command( None }; - let _external_proxy_tls_guards = if config.external_proxy.tls_enable + let external_proxy_tls_guards = if config.external_proxy.tls_enable && (config.external_proxy.tls_certificate.is_none() || config.external_proxy.tls_key.is_none()) { @@ -340,6 +312,47 @@ pub(crate) async fn container_command( None }; + Ok((internal_proxy_tls_guards, external_proxy_tls_guards)) +} + +/// Main entry point for the `mirrord container` command. +/// This spawns: "agent" - "external proxy" - "intproxy sidecar" - "execution container" +pub(crate) async fn container_command( + runtime_args: RuntimeArgs, + exec_params: ExecParams, + watch: drain::Watch, +) -> CliResult { + let mut progress = ProgressTracker::from_env("mirrord container"); + + if runtime_args.command.has_publish() { + progress.warning("mirrord container may have problems with \"-p\" directly container in command, please add to \"contanier.cli_extra_args\" in config if you are planning to publish ports"); + } + + progress.warning("mirrord container is currently an unstable feature"); + + for (name, value) in exec_params.as_env_vars()? { + std::env::set_var(name, value); + } + + std::env::set_var( + MIRRORD_EXECUTION_KIND_ENV, + (CONTAINER_EXECUTION_KIND as u32).to_string(), + ); + + let (mut config, mut context) = LayerConfig::from_env_with_warnings()?; + + // Initialize only error analytics, extproxy will be the full AnalyticsReporter. + let mut analytics = + AnalyticsReporter::only_error(config.telemetry, CONTAINER_EXECUTION_KIND, watch); + + config.verify(&mut context)?; + for warning in context.get_warnings() { + progress.warning(warning); + } + + let (_internal_proxy_tls_guards, _external_proxy_tls_guards) = + prepare_tls_certs_for_container(&mut config)?; + let composed_config_file = create_composed_config(&config)?; std::env::set_var(MIRRORD_CONFIG_FILE_ENV, composed_config_file.path()); @@ -465,3 +478,139 @@ pub(crate) async fn container_command( Ok(status) => Ok(status.code().unwrap_or_default()), } } + +/// Create sidecar and extproxy but return arguments for extension instead of executing run command +pub(crate) async fn container_ext_command( + config_file: Option, + target: Option, + watch: drain::Watch, +) -> CliResult<()> { + let mut progress = ProgressTracker::try_from_env("mirrord preparing to launch") + .unwrap_or_else(|| JsonProgress::new("mirrord preparing to launch").into()); + + let mut env: HashMap = HashMap::new(); + + if let Some(config_file) = config_file.as_ref() { + // Set canoncialized path to config file, in case forks/children processes are in different + // working directories. + let full_path = std::fs::canonicalize(config_file) + .map_err(|e| CliError::CanonicalizeConfigPathFailed(config_file.into(), e))?; + std::env::set_var(MIRRORD_CONFIG_FILE_ENV, full_path.clone()); + env.insert( + MIRRORD_CONFIG_FILE_ENV.into(), + full_path.to_string_lossy().into(), + ); + } + if let Some(target) = target.as_ref() { + std::env::set_var("MIRRORD_IMPERSONATED_TARGET", target.clone()); + env.insert("MIRRORD_IMPERSONATED_TARGET".into(), target.to_string()); + } + let (mut config, mut context) = LayerConfig::from_env_with_warnings()?; + + // Initialize only error analytics, extproxy will be the full AnalyticsReporter. + let mut analytics = AnalyticsReporter::only_error(config.telemetry, Default::default(), watch); + + config.verify(&mut context)?; + for warning in context.get_warnings() { + progress.warning(warning); + } + + let (_internal_proxy_tls_guards, _external_proxy_tls_guards) = + prepare_tls_certs_for_container(&mut config)?; + + let composed_config_file = create_composed_config(&config)?; + std::env::set_var(MIRRORD_CONFIG_FILE_ENV, composed_config_file.path()); + + let mut sub_progress = progress.subtask("preparing to launch process"); + + let execution_info = + MirrordExecution::start_external(&config, &mut sub_progress, &mut analytics).await?; + + let mut connection_info = Vec::new(); + let mut execution_info_env_without_connection_info = Vec::new(); + + for (key, value) in &execution_info.environment { + if key == MIRRORD_CONNECT_TCP_ENV || key == AGENT_CONNECT_INFO_ENV_KEY { + connection_info.push((key.as_str(), value.as_str())); + } else { + execution_info_env_without_connection_info.push((key.as_str(), value.as_str())) + } + } + + sub_progress.success(None); + + let container_runtime = std::env::var("MIRRORD_CONTAINER_USE_RUNTIME") + .ok() + .and_then(|value| ContainerRuntime::from_str(&value, true).ok()) + .unwrap_or(ContainerRuntime::Docker); + + let mut runtime_command = RuntimeCommandBuilder::new(container_runtime); + + if let Ok(console_addr) = std::env::var(MIRRORD_CONSOLE_ADDR_ENV) { + if console_addr + .parse() + .map(|addr: SocketAddr| !addr.ip().is_loopback()) + .unwrap_or_default() + { + runtime_command.add_env(MIRRORD_CONSOLE_ADDR_ENV, console_addr); + } else { + tracing::warn!( + ?console_addr, + "{MIRRORD_CONSOLE_ADDR_ENV} needs to be a non loopback address when used with containers" + ); + } + } + + runtime_command.add_env(MIRRORD_PROGRESS_ENV, "off"); + runtime_command.add_env( + MIRRORD_EXECUTION_KIND_ENV, + (CONTAINER_EXECUTION_KIND as u32).to_string(), + ); + + runtime_command.add_env(MIRRORD_CONFIG_FILE_ENV, "/tmp/mirrord-config.json"); + runtime_command + .add_volume::(composed_config_file.path(), "/tmp/mirrord-config.json"); + + let mut load_env_and_mount_pem = |env: &str, path: &Path| { + let container_path = format!("/tmp/{}.pem", env.to_lowercase()); + + runtime_command.add_env(env, &container_path); + runtime_command.add_volume::(path, container_path); + }; + + if let Some(path) = config.internal_proxy.client_tls_certificate.as_ref() { + load_env_and_mount_pem(MIRRORD_INTPROXY_CLIENT_TLS_CERTIFICATE_ENV, path) + } + + if let Some(path) = config.internal_proxy.client_tls_key.as_ref() { + load_env_and_mount_pem(MIRRORD_INTPROXY_CLIENT_TLS_KEY_ENV, path) + } + + if let Some(path) = config.external_proxy.tls_certificate.as_ref() { + load_env_and_mount_pem(MIRRORD_EXTERNAL_TLS_CERTIFICATE_ENV, path) + } + + if let Some(path) = config.external_proxy.tls_key.as_ref() { + load_env_and_mount_pem(MIRRORD_EXTERNAL_TLS_KEY_ENV, path) + } + + runtime_command.add_envs(execution_info_env_without_connection_info); + + let (sidecar_container_id, sidecar_intproxy_address) = + create_sidecar_intproxy(&config, &runtime_command, connection_info).await?; + + runtime_command.add_network(format!("container:{sidecar_container_id}")); + runtime_command.add_volumes_from(sidecar_container_id); + + runtime_command.add_env(LINUX_INJECTION_ENV_VAR, config.container.cli_image_lib_path); + runtime_command.add_env( + MIRRORD_CONNECT_TCP_ENV, + sidecar_intproxy_address.to_string(), + ); + + let output = serde_json::to_string(&runtime_command.into_command_extension_params())?; + progress.success(Some(&output)); + execution_info.wait().await?; + + Ok(()) +} diff --git a/mirrord/cli/src/container/command_builder.rs b/mirrord/cli/src/container/command_builder.rs index e4cd6d843af..20e0fac883b 100644 --- a/mirrord/cli/src/container/command_builder.rs +++ b/mirrord/cli/src/container/command_builder.rs @@ -1,12 +1,12 @@ use std::{ffi::OsStr, path::Path}; -use crate::config::{ContainerCommand, ContainerRuntime}; +use crate::config::{ContainerRuntime, ContainerRuntimeCommand}; #[derive(Debug, Clone)] pub struct Empty; #[derive(Debug, Clone)] pub struct WithCommand { - command: ContainerCommand, + command: ContainerRuntimeCommand, } #[derive(Debug, Clone)] @@ -113,7 +113,7 @@ impl RuntimeCommandBuilder { pub(super) fn with_command( self, - command: ContainerCommand, + command: ContainerRuntimeCommand, ) -> RuntimeCommandBuilder { let RuntimeCommandBuilder { runtime, @@ -127,6 +127,20 @@ impl RuntimeCommandBuilder { extra_args, } } + + /// Convert to serialzable result for mirrord extensions + pub fn into_command_extension_params(self) -> ExtensionRuntimeCommand { + let RuntimeCommandBuilder { + runtime, + extra_args, + .. + } = self; + + ExtensionRuntimeCommand { + runtime, + extra_args, + } + } } impl RuntimeCommandBuilder { @@ -139,7 +153,7 @@ impl RuntimeCommandBuilder { } = self; let (runtime_command, runtime_args) = match step.command { - ContainerCommand::Run { runtime_args } => ("run".to_owned(), runtime_args), + ContainerRuntimeCommand::Run { runtime_args } => ("run".to_owned(), runtime_args), }; ( @@ -150,3 +164,14 @@ impl RuntimeCommandBuilder { ) } } + +/// Information for mirrord extensions to use mirrord container feature but injecting the connection +/// info into the container manualy by the extension +#[derive(Debug, serde::Serialize)] +pub struct ExtensionRuntimeCommand { + /// Container runtime to use (should be defaulted to docker) + runtime: ContainerRuntime, + + /// Run command args that the extension should add to container command + extra_args: Vec, +} diff --git a/mirrord/cli/src/internal_proxy.rs b/mirrord/cli/src/internal_proxy.rs index 2a1a6f3e829..1c8763a0d92 100644 --- a/mirrord/cli/src/internal_proxy.rs +++ b/mirrord/cli/src/internal_proxy.rs @@ -141,13 +141,17 @@ pub(crate) async fn proxy( let first_connection_timeout = Duration::from_secs(config.internal_proxy.start_idle_timeout); let consecutive_connection_timeout = Duration::from_secs(config.internal_proxy.idle_timeout); - IntProxy::new_with_connection(agent_conn, listener) - .run(first_connection_timeout, consecutive_connection_timeout) - .await - .map_err(InternalProxyError::from) - .inspect_err(|error| { - tracing::error!(%error, "Internal proxy encountered an error, exiting"); - }) + IntProxy::new_with_connection( + agent_conn, + listener, + config.experimental.readonly_file_buffer, + ) + .run(first_connection_timeout, consecutive_connection_timeout) + .await + .map_err(InternalProxyError::from) + .inspect_err(|error| { + tracing::error!(%error, "Internal proxy encountered an error, exiting"); + }) } /// Creates a connection with the agent and handles one round of ping pong. diff --git a/mirrord/cli/src/main.rs b/mirrord/cli/src/main.rs index bd9dc386714..9213c1cc57a 100644 --- a/mirrord/cli/src/main.rs +++ b/mirrord/cli/src/main.rs @@ -11,7 +11,7 @@ use clap::{CommandFactory, Parser}; use clap_complete::generate; use config::*; use connection::create_and_connect; -use container::container_command; +use container::{container_command, container_ext_command}; use diagnose::diagnose_command; use execution::MirrordExecution; use extension::extension_exec; @@ -689,12 +689,16 @@ fn main() -> miette::Result<()> { Commands::Diagnose(args) => diagnose_command(*args).await?, Commands::Container(args) => { let (runtime_args, exec_params) = args.into_parts(); + let exit_code = container_command(runtime_args, exec_params, watch).await?; if exit_code != 0 { std::process::exit(exit_code); } } + Commands::ExtensionContainer(args) => { + container_ext_command(args.config_file, args.target, watch).await? + } Commands::ExternalProxy { port } => external_proxy::proxy(port, watch).await?, Commands::PortForward(args) => port_forward(&args, watch).await?, Commands::Vpn(args) => vpn::vpn_command(*args).await?, diff --git a/mirrord/config/configuration.md b/mirrord/config/configuration.md index fb7416c1c3b..df0788ada78 100644 --- a/mirrord/config/configuration.md +++ b/mirrord/config/configuration.md @@ -77,6 +77,9 @@ configuration file containing all fields. "override": { "DATABASE_CONNECTION": "db://localhost:7777/my-db", "LOCAL_BEAR": "panda" + }, + "mapping": { + ".+_TIMEOUT": "1000" } }, "fs": { @@ -433,6 +436,10 @@ Path of the mirrord-layer lib inside the specified mirrord-cli image. Defaults to `"/opt/mirrord/lib/libmirrord_layer.so"`. +### container.cli_prevent_cleanup {#container-cli_extra_args} + +Don't add `--rm` to sidecar command to prevent cleanup. + ## experimental {#root-experimental} mirrord Experimental features. @@ -460,6 +467,16 @@ Enables `getifaddrs` hook that removes IPv6 interfaces from the list returned by DEPRECATED, WILL BE REMOVED +### _experimental_ readonly_file_buffer {#experimental-readonly_file_buffer} + +Sets buffer size for readonly remote files (in bytes, for example 4096). +If set, such files will be read in chunks and buffered locally. +This improves performace when the user application reads data in small portions. + +Setting to 0 disables file buffering. + + + ### _experimental_ tcp_ping4_mock {#experimental-tcp_ping4_mock} @@ -649,6 +666,9 @@ See the environment variables [reference](https://mirrord.dev/docs/reference/env "override": { "DATABASE_CONNECTION": "db://localhost:7777/my-db", "LOCAL_BEAR": "panda" + }, + "mapping": { + ".+_TIMEOUT": "1000" } } } @@ -692,6 +712,27 @@ If set, the variables are fetched after the user application is started. This setting is meant to resolve issues when using mirrord via the IntelliJ plugin on WSL and the remote environment contains a lot of variables. +### feature.env.mapping {#feature-env-mapping} + +Specify map of patterns that if matched will replace the value according to specification. + +*Capture groups are allowed.* + +Example: +```json +{ + ".+_TIMEOUT": "10000" + "LOG_.+_VERBOSITY": "debug" + "(\w+)_(\d+)": "magic-value" +} +``` + +Will do the next replacements for environment variables that match: + +`CONNECTION_TIMEOUT: 500` => `CONNECTION_TIMEOUT: 10000` +`LOG_FILE_VERBOSITY: info` => `LOG_FILE_VERBOSITY: debug` +`DATA_1234: common-value` => `DATA_1234: magic-value` + ### feature.env.override {#feature-env-override} Allows setting or overriding environment variables (locally) with a custom value. diff --git a/mirrord/config/src/container.rs b/mirrord/config/src/container.rs index f118350c4a6..5f9e16f7447 100644 --- a/mirrord/config/src/container.rs +++ b/mirrord/config/src/container.rs @@ -40,6 +40,12 @@ pub struct ContainerConfig { #[config(default)] pub cli_extra_args: Vec, + /// ### container.cli_prevent_cleanup {#container-cli_extra_args} + /// + /// Don't add `--rm` to sidecar command to prevent cleanup. + #[config(default)] + pub cli_prevent_cleanup: bool, + /// ### container.cli_image_lib_path {#container-cli_image} /// /// Path of the mirrord-layer lib inside the specified mirrord-cli image. diff --git a/mirrord/config/src/experimental.rs b/mirrord/config/src/experimental.rs index 04fdf423527..1fd9213a762 100644 --- a/mirrord/config/src/experimental.rs +++ b/mirrord/config/src/experimental.rs @@ -58,6 +58,18 @@ pub struct ExperimentalConfig { /// Uses /dev/null for creating local fake files (should be better than using /tmp) #[config(default = true)] pub use_dev_null: bool, + + /// ### _experimental_ readonly_file_buffer {#experimental-readonly_file_buffer} + /// + /// Sets buffer size for readonly remote files (in bytes, for example 4096). + /// If set, such files will be read in chunks and buffered locally. + /// This improves performace when the user application reads data in small portions. + /// + /// Setting to 0 disables file buffering. + /// + /// + #[config(default = 0)] + pub readonly_file_buffer: u64, } impl CollectAnalytics for &ExperimentalConfig { @@ -68,5 +80,6 @@ impl CollectAnalytics for &ExperimentalConfig { analytics.add("enable_exec_hooks_linux", self.enable_exec_hooks_linux); analytics.add("hide_ipv6_interfaces", self.hide_ipv6_interfaces); analytics.add("disable_reuseaddr", self.disable_reuseaddr); + analytics.add("readonly_file_buffer", self.readonly_file_buffer); } } diff --git a/mirrord/intproxy/Cargo.toml b/mirrord/intproxy/Cargo.toml index 85334cb83bd..1c9cf9ecc75 100644 --- a/mirrord/intproxy/Cargo.toml +++ b/mirrord/intproxy/Cargo.toml @@ -47,3 +47,4 @@ exponential-backoff = "2" [dev-dependencies] reqwest.workspace = true +rstest.workspace = true diff --git a/mirrord/intproxy/protocol/src/lib.rs b/mirrord/intproxy/protocol/src/lib.rs index 5c5a003260c..623020718b6 100644 --- a/mirrord/intproxy/protocol/src/lib.rs +++ b/mirrord/intproxy/protocol/src/lib.rs @@ -35,7 +35,7 @@ pub struct LocalMessage { } /// Messages sent by the layer and handled by the internal proxy. -#[derive(Encode, Decode, Debug)] +#[derive(Encode, Decode, Debug, PartialEq, Eq)] pub enum LayerToProxyMessage { /// A request to start new `layer <-> proxy` session. /// This should be the first message sent by the layer after opening a new connection to the @@ -54,7 +54,7 @@ pub enum LayerToProxyMessage { } /// Layer process information -#[derive(Encode, Decode, Debug)] +#[derive(Encode, Decode, Debug, PartialEq, Eq)] pub struct ProcessInfo { /// Process ID. pub pid: u32, @@ -82,7 +82,7 @@ pub struct LayerId(pub u64); /// Sharing state between [`exec`](https://man7.org/linux/man-pages/man3/exec.3.html) calls is currently not supported. /// Therefore, when the layer initializes, it uses [`NewSessionRequest::New`] and does not inherit /// any state. -#[derive(Encode, Decode, Debug)] +#[derive(Encode, Decode, Debug, PartialEq, Eq)] pub enum NewSessionRequest { /// Layer initialized from its constructor, has a fresh state. New(ProcessInfo), @@ -119,7 +119,7 @@ impl fmt::Display for NetProtocol { } /// A request to initiate a new outgoing connection. -#[derive(Encode, Decode, Debug)] +#[derive(Encode, Decode, Debug, PartialEq, Eq)] pub struct OutgoingConnectRequest { /// The address the user application tries to connect to. pub remote_address: SocketAddress, @@ -128,7 +128,7 @@ pub struct OutgoingConnectRequest { } /// Requests related to incoming connections. -#[derive(Encode, Decode, Debug)] +#[derive(Encode, Decode, Debug, PartialEq, Eq)] pub enum IncomingRequest { /// A request made by layer when it starts listening for mirrored connections. PortSubscribe(PortSubscribe), @@ -152,7 +152,7 @@ pub struct ConnMetadataRequest { /// A response to layer's [`ConnMetadataRequest`]. /// Contains metadata useful for hooking `getsockname` and `getpeername`. -#[derive(Encode, Decode, Debug, Clone)] +#[derive(Encode, Decode, Debug, Clone, PartialEq, Eq)] pub struct ConnMetadataResponse { /// Original source of data, provided by the agent. Meant to be exposed to the user instead of /// the real source, which will always be localhost. @@ -176,7 +176,7 @@ pub struct ConnMetadataResponse { /// /// For each connection incoming to the remote port, /// the internal proxy will initiate a new connection to the local port specified in `listening_on`. -#[derive(Encode, Decode, Debug, Clone)] +#[derive(Encode, Decode, Debug, Clone, PartialEq, Eq)] pub struct PortSubscribe { /// Local address on which the layer is listening. pub listening_on: SocketAddr, @@ -185,7 +185,7 @@ pub struct PortSubscribe { } /// Instructions for the internal proxy and the agent on how to execute port mirroring. -#[derive(Encode, Decode, Debug, Clone)] +#[derive(Encode, Decode, Debug, Clone, PartialEq, Eq)] pub enum PortSubscription { /// Wrapped [`StealType`] specifies how to execute port mirroring. Steal(StealType), @@ -194,7 +194,7 @@ pub enum PortSubscription { } /// A request to stop proxying incoming connections. -#[derive(Encode, Decode, Debug)] +#[derive(Encode, Decode, Debug, PartialEq, Eq)] pub struct PortUnsubscribe { /// Port on the remote pod that layer mirrored. pub port: Port, @@ -203,7 +203,7 @@ pub struct PortUnsubscribe { } /// Messages sent by the internal proxy and handled by the layer. -#[derive(Encode, Decode, Debug)] +#[derive(Encode, Decode, Debug, PartialEq, Eq)] pub enum ProxyToLayerMessage { /// A response to [`NewSessionRequest`]. Contains the identifier of the new `layer <-> proxy` /// session. @@ -221,7 +221,7 @@ pub enum ProxyToLayerMessage { } /// A response to layer's [`IncomingRequest`]. -#[derive(Encode, Decode, Debug)] +#[derive(Encode, Decode, Debug, PartialEq, Eq)] pub enum IncomingResponse { /// A response to layer's [`PortSubscribe`]. /// As a temporary workaround to [agent protocol](mirrord_protocol) limitations, the only error @@ -234,7 +234,7 @@ pub enum IncomingResponse { } /// A response to layer's [`OutgoingConnectRequest`]. -#[derive(Encode, Decode, Debug)] +#[derive(Encode, Decode, Debug, PartialEq, Eq)] pub struct OutgoingConnectResponse { /// The address the layer should connect to instead of the address requested by the user. pub layer_address: SocketAddress, @@ -310,6 +310,20 @@ impl_request!( res_path = ProxyToLayerMessage::File => FileResponse::ReadLink, ); +impl_request!( + req = MakeDirRequest, + res = RemoteResult<()>, + req_path = LayerToProxyMessage::File => FileRequest::MakeDir, + res_path = ProxyToLayerMessage::File => FileResponse::MakeDir, +); + +impl_request!( + req = MakeDirAtRequest, + res = RemoteResult<()>, + req_path = LayerToProxyMessage::File => FileRequest::MakeDirAt, + res_path = ProxyToLayerMessage::File => FileResponse::MakeDir, +); + impl_request!( req = SeekFileRequest, res = RemoteResult, diff --git a/mirrord/intproxy/src/background_tasks.rs b/mirrord/intproxy/src/background_tasks.rs index 869f860ffda..e43c8f306b8 100644 --- a/mirrord/intproxy/src/background_tasks.rs +++ b/mirrord/intproxy/src/background_tasks.rs @@ -166,6 +166,7 @@ where /// An error that can occur when executing a [`BackgroundTask`]. #[derive(Debug)] +#[cfg_attr(test, derive(PartialEq, Eq))] pub enum TaskError { /// An internal task error. Error(Err), @@ -182,6 +183,16 @@ pub enum TaskUpdate { Finished(Result<(), TaskError>), } +#[cfg(test)] +impl TaskUpdate { + pub fn unwrap_message(self) -> MOut { + match self { + Self::Message(mout) => mout, + Self::Finished(res) => panic!("expected a message, got task result: {res:?}"), + } + } +} + /// A struct that can be used to send messages to a [`BackgroundTask`] registered /// /// A struct that can be used to send messages to a [`BackgroundTask`] registered in the diff --git a/mirrord/intproxy/src/error.rs b/mirrord/intproxy/src/error.rs index 74419e6a035..457b94c17df 100644 --- a/mirrord/intproxy/src/error.rs +++ b/mirrord/intproxy/src/error.rs @@ -8,11 +8,17 @@ use crate::{ agent_conn::{AgentChannelError, AgentConnectionError}, layer_initializer::LayerInitializerError, ping_pong::PingPongError, - proxies::{incoming::IncomingProxyError, outgoing::OutgoingProxyError}, - request_queue::RequestQueueEmpty, + proxies::{ + files::FilesProxyError, incoming::IncomingProxyError, outgoing::OutgoingProxyError, + simple::SimpleProxyError, + }, MainTaskId, }; +#[derive(Error, Debug)] +#[error("agent sent an unexpected message: {0:?}")] +pub struct UnexpectedAgentMessage(pub DaemonMessage); + #[derive(Error, Debug)] pub enum IntProxyError { #[error("waiting for the first layer connection timed out")] @@ -26,8 +32,8 @@ pub enum IntProxyError { AgentConnection(#[from] AgentConnectionError), #[error("agent closed connection with error: {0}")] AgentFailed(String), - #[error("agent sent unexpected message: {0:?}")] - UnexpectedAgentMessage(DaemonMessage), + #[error(transparent)] + UnexpectedAgentMessage(#[from] UnexpectedAgentMessage), #[error("background task {0} exited unexpectedly")] TaskExit(MainTaskId), @@ -43,11 +49,13 @@ pub enum IntProxyError { #[error("layer connection failed: {0}")] LayerConnection(#[from] CodecError), #[error("simple proxy failed: {0}")] - SimpleProxy(#[from] RequestQueueEmpty), + SimpleProxy(#[from] SimpleProxyError), #[error("outgoing proxy failed: {0}")] OutgoingProxy(#[from] OutgoingProxyError), #[error("incoming proxy failed: {0}")] IncomingProxy(#[from] IncomingProxyError), + #[error("files proxy failed: {0}")] + FilesProxy(#[from] FilesProxyError), } pub type Result = core::result::Result; diff --git a/mirrord/intproxy/src/lib.rs b/mirrord/intproxy/src/lib.rs index 794016d78a9..d9f85f64f55 100644 --- a/mirrord/intproxy/src/lib.rs +++ b/mirrord/intproxy/src/lib.rs @@ -4,6 +4,7 @@ use std::{collections::HashMap, time::Duration}; use background_tasks::{BackgroundTasks, TaskSender, TaskUpdate}; +use error::UnexpectedAgentMessage; use layer_conn::LayerConnection; use layer_initializer::LayerInitializer; use main_tasks::{FromLayer, LayerForked, MainTaskId, ProxyMessage, ToLayer}; @@ -11,6 +12,7 @@ use mirrord_intproxy_protocol::{LayerId, LayerToProxyMessage, LocalMessage}; use mirrord_protocol::{ClientMessage, DaemonMessage, LogLevel, CLIENT_READY_FOR_LOGS}; use ping_pong::{AgentSentPong, PingPong}; use proxies::{ + files::{FilesProxy, FilesProxyMessage}, incoming::{IncomingProxy, IncomingProxyMessage}, outgoing::{OutgoingProxy, OutgoingProxyMessage}, simple::{SimpleProxy, SimpleProxyMessage}, @@ -43,6 +45,7 @@ struct TaskTxs { outgoing: TaskSender, incoming: TaskSender, ping_pong: TaskSender, + files: TaskSender, } /// This struct contains logic for proxying between multiple layer instances and one agent. @@ -65,7 +68,11 @@ impl IntProxy { /// Creates a new [`IntProxy`] using existing [`AgentConnection`]. /// The returned instance will accept connections from the layers using the given /// [`TcpListener`]. - pub fn new_with_connection(agent_conn: AgentConnection, listener: TcpListener) -> Self { + pub fn new_with_connection( + agent_conn: AgentConnection, + listener: TcpListener, + file_buffer_size: u64, + ) -> Self { let mut background_tasks: BackgroundTasks = Default::default(); @@ -96,6 +103,11 @@ impl IntProxy { MainTaskId::IncomingProxy, Self::CHANNEL_SIZE, ); + let files = background_tasks.register( + FilesProxy::new(file_buffer_size), + MainTaskId::FilesProxy, + Self::CHANNEL_SIZE, + ); Self { any_connection_accepted: false, @@ -108,6 +120,7 @@ impl IntProxy { outgoing, incoming, ping_pong, + files, }, } } @@ -179,8 +192,8 @@ impl IntProxy { }; self.task_txs - .simple - .send(SimpleProxyMessage::LayerForked(msg)) + .files + .send(FilesProxyMessage::LayerForked(msg)) .await; self.task_txs .incoming @@ -224,8 +237,8 @@ impl IntProxy { let msg = LayerClosed { id: LayerId(id) }; self.task_txs - .simple - .send(SimpleProxyMessage::LayerClosed(msg)) + .files + .send(FilesProxyMessage::LayerClosed(msg)) .await; self.task_txs .incoming @@ -275,8 +288,8 @@ impl IntProxy { } DaemonMessage::File(msg) => { self.task_txs - .simple - .send(SimpleProxyMessage::FileRes(msg)) + .files + .send(FilesProxyMessage::FileRes(msg)) .await } DaemonMessage::GetAddrInfoResponse(msg) => { @@ -303,10 +316,8 @@ impl IntProxy { } self.task_txs - .simple - .send(SimpleProxyMessage::ProtocolVersion( - protocol_version.clone(), - )) + .files + .send(FilesProxyMessage::ProtocolVersion(protocol_version.clone())) .await; self.task_txs @@ -325,7 +336,9 @@ impl IntProxy { .await } other => { - return Err(IntProxyError::UnexpectedAgentMessage(other)); + return Err(IntProxyError::UnexpectedAgentMessage( + UnexpectedAgentMessage(other), + )); } } @@ -344,8 +357,8 @@ impl IntProxy { match message { LayerToProxyMessage::File(req) => { self.task_txs - .simple - .send(SimpleProxyMessage::FileReq(message_id, layer_id, req)) + .files + .send(FilesProxyMessage::FileReq(message_id, layer_id, req)) .await; } LayerToProxyMessage::GetAddrInfo(req) => { diff --git a/mirrord/intproxy/src/main_tasks.rs b/mirrord/intproxy/src/main_tasks.rs index a6ad4a54b77..fd0e5d7e8e5 100644 --- a/mirrord/intproxy/src/main_tasks.rs +++ b/mirrord/intproxy/src/main_tasks.rs @@ -7,6 +7,7 @@ use tokio::net::TcpStream; /// Messages sent back to the [`IntProxy`](crate::IntProxy) from the main background tasks. See /// [`MainTaskId`]. #[derive(Debug)] +#[cfg_attr(test, derive(PartialEq, Eq))] pub enum ProxyMessage { /// Message to be sent to the agent. ToAgent(ClientMessage), @@ -20,7 +21,18 @@ pub enum ProxyMessage { NewLayer(NewLayer), } +#[cfg(test)] +impl ProxyMessage { + pub fn unwrap_proxy_to_layer_message(self) -> ProxyToLayerMessage { + match self { + Self::ToLayer(to_layer) => to_layer.message, + other => panic!("expected proxy to layer message, found {other:?}"), + } + } +} + #[derive(Debug)] +#[cfg_attr(test, derive(PartialEq, Eq))] pub struct ToLayer { pub message_id: MessageId, pub layer_id: LayerId, @@ -28,6 +40,7 @@ pub struct ToLayer { } #[derive(Debug)] +#[cfg_attr(test, derive(PartialEq, Eq))] pub struct FromLayer { pub message_id: MessageId, pub layer_id: LayerId, @@ -42,6 +55,16 @@ pub struct NewLayer { pub parent_id: Option, } +#[cfg(test)] +impl PartialEq for NewLayer { + fn eq(&self, other: &Self) -> bool { + self.id == other.id && self.parent_id == other.parent_id + } +} + +#[cfg(test)] +impl Eq for NewLayer {} + impl From for ProxyMessage { fn from(value: ClientMessage) -> Self { Self::ToAgent(value) @@ -82,6 +105,7 @@ pub enum MainTaskId { IncomingProxy, PingPong, AgentConnection, + FilesProxy, LayerConnection(LayerId), } @@ -95,6 +119,7 @@ impl fmt::Display for MainTaskId { Self::AgentConnection => f.write_str("AGENT_CONNECTION"), Self::LayerConnection(id) => write!(f, "LAYER_CONNECTION {}", id.0), Self::IncomingProxy => f.write_str("INCOMING_PROXY"), + Self::FilesProxy => f.write_str("FILES_PROXY"), } } } diff --git a/mirrord/intproxy/src/proxies.rs b/mirrord/intproxy/src/proxies.rs index 94fedb589c6..47f8f274e9e 100644 --- a/mirrord/intproxy/src/proxies.rs +++ b/mirrord/intproxy/src/proxies.rs @@ -1,6 +1,7 @@ //! Sub-proxies of the internal proxy. Each of these encapsulates logic for handling a group of //! related requests and exchanges messages only with the [`IntProxy`](crate::IntProxy). +pub mod files; pub mod incoming; pub mod outgoing; pub mod simple; diff --git a/mirrord/intproxy/src/proxies/files.rs b/mirrord/intproxy/src/proxies/files.rs new file mode 100644 index 00000000000..79ac6695575 --- /dev/null +++ b/mirrord/intproxy/src/proxies/files.rs @@ -0,0 +1,1373 @@ +use core::fmt; +use std::{collections::HashMap, vec}; + +use mirrord_intproxy_protocol::{LayerId, MessageId, ProxyToLayerMessage}; +use mirrord_protocol::{ + file::{ + CloseDirRequest, CloseFileRequest, DirEntryInternal, ReadDirBatchRequest, ReadDirResponse, + ReadFileResponse, ReadLimitedFileRequest, SeekFromInternal, MKDIR_VERSION, + READDIR_BATCH_VERSION, READLINK_VERSION, + }, + ClientMessage, DaemonMessage, ErrorKindInternal, FileRequest, FileResponse, RemoteIOError, + ResponseError, +}; +use semver::Version; +use thiserror::Error; +use tracing::Level; + +use crate::{ + background_tasks::{BackgroundTask, MessageBus}, + error::UnexpectedAgentMessage, + main_tasks::{LayerClosed, LayerForked, ProxyMessage, ToLayer}, + remote_resources::RemoteResources, + request_queue::RequestQueue, +}; + +/// Messages handled by [`FilesProxy`]. +#[derive(Debug)] +pub enum FilesProxyMessage { + /// Layer sent file request. + FileReq(MessageId, LayerId, FileRequest), + /// Agent sent file response. + FileRes(FileResponse), + /// Protocol version was negotiated with the agent. + ProtocolVersion(Version), + /// Layer instance forked. + LayerForked(LayerForked), + /// Layer instance closed. + LayerClosed(LayerClosed), +} + +/// Error that can occur in [`FilesProxy`]. +#[derive(Error, Debug)] +#[error(transparent)] +pub struct FilesProxyError(#[from] UnexpectedAgentMessage); + +/// Locally cached data of a remote file that is buffered. +#[derive(Default)] +struct BufferedFileData { + /// Buffered file contents. + buffer: Vec, + /// Position of [`Self::buffer`] in the file. + buffer_position: u64, + /// Position of the file descriptor in the file. + /// This position is normally managed in the agent, + /// but for buffered files we manage it here. + /// It's simpler this way. + fd_position: u64, +} + +impl BufferedFileData { + /// Attempts to read `amount` bytes from [`Self::buffer`], starting from `position` in the file. + /// + /// Returns [`None`] when the read does not fit in the buffer in whole. + fn read_from_buffer(&self, amount: u64, position: u64) -> Option<&[u8]> { + let start_from = position.checked_sub(self.buffer_position)? as usize; + let end_before = start_from + amount as usize; + self.buffer.get(start_from..end_before) + } +} + +impl fmt::Debug for BufferedFileData { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("BufferedFileData") + .field("buffer_position", &self.buffer_position) + .field("buffer_len", &self.buffer.len()) + .field("fd_position", &self.fd_position) + .finish() + } +} + +/// Locally cached data of a remote directory that is buffered. +#[derive(Default)] +struct BufferedDirData { + /// Buffered entries of this directory. + buffered_entries: vec::IntoIter, +} + +impl fmt::Debug for BufferedDirData { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("BufferedDirData") + .field("remaining_buffered_entries", &self.buffered_entries.len()) + .finish() + } +} + +/// Additional request data that is saved by [`FilesProxy`] in its [`RequestQueue`]. +/// Allows for handling buffered reads by marking requests that should be handled in a special way. +#[derive(Debug, Default)] +enum AdditionalRequestData { + /// Open file that will be buffered. + OpenBuffered, + + /// Read file that is buffered. + ReadBuffered { + /// File descriptor. + fd: u64, + /// Read buffer size of the user application. + /// The user requested reading this many bytes. + requested_amount: u64, + /// Whether we should update fd position in file + /// (we store it locally). + update_fd_position: bool, + }, + + /// Seek file that is buffered. + SeekBuffered { + /// File descriptor. + fd: u64, + }, + + /// All other file ops. + #[default] + Other, +} + +/// For handling all file operations. +/// Run as a [`BackgroundTask`]. +/// +/// # Directory buffering +/// +/// To optimize cases where user application traverses large directories, +/// we use [`FileRequest::ReadDirBatch`] to fetch many entries at once +/// ([`Self::READDIR_BATCH_SIZE`]). +/// +/// Excessive entries are cached locally in this proxy and used until depleted. +/// +/// # File buffering +/// +/// To optimize cases where user application makes a lot of small reads on remote files, +/// we change the way of reading readonly files. +/// +/// 1. When created with [`FilesProxy::new`], this proxy is given a desired file buffer size. Buffer +/// size 0 disables file buffering. +/// 2. When the user requests a read, we fetch at least `buffer_size` bytes. We return the amount +/// requested by the user and store the whole response as a local buffer. +/// 3. When the user requests a read again, we try to fulfill the request using only the local +/// buffer. If it's not possible, we proceed as in point 1 +/// 4. To solve problems with descriptor offset, we only use [`FileRequest::ReadLimited`] to read +/// buffered files. Descriptor offset value is maintained in this proxy. +pub struct FilesProxy { + /// [`mirrord_protocol`] version negotiated with the agent. + /// Determines whether we can use some messages, like [`FileRequest::ReadDirBatch`] or + /// [`FileRequest::ReadLink`]. + protocol_version: Option, + + /// Size for readonly files buffer. + /// If equal to 0, this proxy does not buffer files. + file_buffer_size: u64, + + /// Stores metadata of outstanding requests. + request_queue: RequestQueue, + + /// For tracking remote file descriptors across layer instances (forks). + remote_files: RemoteResources, + /// Locally stored data of buffered files. + buffered_files: HashMap, + + /// For tracking remote directory descriptors across layer instances (forks). + remote_dirs: RemoteResources, + /// Locally stored data of buffered directories. + buffered_dirs: HashMap, +} + +impl fmt::Debug for FilesProxy { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("FilesProxy") + .field("file_buffer_size", &self.file_buffer_size) + .field("buffer_readdir", &self.buffer_dirs()) + .field("buffered_files", &self.buffered_files) + .field("buffered_dirs", &self.buffered_dirs) + .field("protocol_version", &self.protocol_version) + .field("request_queue", &self.request_queue) + .finish() + } +} + +impl FilesProxy { + /// How many directory entries we request at a time. + /// Relevant only if [`mirrord_protocol`] version allows for [`FileRequest::ReadDirBatch`]. + pub const READDIR_BATCH_SIZE: usize = 128; + + /// Creates a new files proxy instance. + /// Proxy can be used as a [`BackgroundTask`]. + /// + /// `file_buffer_size` sets size of the readonly files buffer. + /// Size 0 disables buffering. + pub fn new(file_buffer_size: u64) -> Self { + Self { + protocol_version: Default::default(), + file_buffer_size, + + request_queue: Default::default(), + + remote_files: Default::default(), + buffered_files: Default::default(), + + remote_dirs: Default::default(), + buffered_dirs: Default::default(), + } + } + + /// Returns whether [`mirrord_protocol`] version allows for buffering directories. + fn buffer_dirs(&self) -> bool { + self.protocol_version + .as_ref() + .is_some_and(|version| READDIR_BATCH_VERSION.matches(version)) + } + + /// Returns whether this proxy is configured to buffer readonly files. + fn buffer_reads(&self) -> bool { + self.file_buffer_size > 0 + } + + #[tracing::instrument(level = Level::TRACE)] + fn layer_forked(&mut self, forked: LayerForked) { + self.remote_files.clone_all(forked.parent, forked.child); + self.remote_dirs.clone_all(forked.parent, forked.child); + } + + #[tracing::instrument(level = Level::TRACE, skip(message_bus))] + async fn layer_closed(&mut self, closed: LayerClosed, message_bus: &mut MessageBus) { + for fd in self.remote_files.remove_all(closed.id) { + self.buffered_files.remove(&fd); + message_bus + .send(ProxyMessage::ToAgent(ClientMessage::FileRequest( + FileRequest::Close(CloseFileRequest { fd }), + ))) + .await; + } + + for remote_fd in self.remote_dirs.remove_all(closed.id) { + self.buffered_dirs.remove(&remote_fd); + message_bus + .send(ProxyMessage::ToAgent(ClientMessage::FileRequest( + FileRequest::CloseDir(CloseDirRequest { remote_fd }), + ))) + .await; + } + } + + #[tracing::instrument(level = Level::TRACE)] + fn protocol_version(&mut self, version: Version) { + self.protocol_version.replace(version); + } + + // #[tracing::instrument(level = Level::TRACE, skip(message_bus))] + async fn file_request( + &mut self, + request: FileRequest, + layer_id: LayerId, + message_id: MessageId, + message_bus: &mut MessageBus, + ) { + match request { + // Should trigger remote close only when the fd is closed in all layer instances. + FileRequest::Close(close) => { + if self.remote_files.remove(layer_id, close.fd) { + self.buffered_files.remove(&close.fd); + message_bus + .send(ClientMessage::FileRequest(FileRequest::Close(close))) + .await; + } + } + + // Should trigger remote close only when the fd is closed in all layer instances. + FileRequest::CloseDir(close) => { + if self.remote_dirs.remove(layer_id, close.remote_fd) { + self.buffered_dirs.remove(&close.remote_fd); + message_bus + .send(ClientMessage::FileRequest(FileRequest::CloseDir(close))) + .await; + } + } + + // May require storing additional data in the request queue. + FileRequest::Open(open) => { + let additional_data = (self.buffer_reads() && open.open_options.is_read_only()) + .then_some(AdditionalRequestData::OpenBuffered) + .unwrap_or_default(); + self.request_queue + .push_back_with_data(message_id, layer_id, additional_data); + message_bus + .send(ClientMessage::FileRequest(FileRequest::Open(open))) + .await; + } + + // May require storing additional data in the request queue. + FileRequest::OpenRelative(open) => { + let additional_data = (self.buffer_reads() && open.open_options.is_read_only()) + .then_some(AdditionalRequestData::OpenBuffered) + .unwrap_or_default(); + self.request_queue + .push_back_with_data(message_id, layer_id, additional_data); + message_bus + .send(ClientMessage::FileRequest(FileRequest::OpenRelative(open))) + .await; + } + + // Try to use local buffer if possible. + FileRequest::Read(read) => match self.buffered_files.get_mut(&read.remote_fd) { + // File is buffered. + Some(data) => { + let from_buffer = data.read_from_buffer(read.buffer_size, data.fd_position); + if let Some(from_buffer) = from_buffer { + let bytes = from_buffer.to_vec(); + data.fd_position += read.buffer_size; + message_bus + .send(ToLayer { + message_id, + layer_id, + message: ProxyToLayerMessage::File(FileResponse::Read(Ok( + ReadFileResponse { + bytes, + read_amount: read.buffer_size, + }, + ))), + }) + .await; + } else { + let additional_data = AdditionalRequestData::ReadBuffered { + fd: read.remote_fd, + requested_amount: read.buffer_size, + update_fd_position: true, + }; + self.request_queue.push_back_with_data( + message_id, + layer_id, + additional_data, + ); + message_bus + .send(ClientMessage::FileRequest(FileRequest::ReadLimited( + ReadLimitedFileRequest { + remote_fd: read.remote_fd, + buffer_size: std::cmp::max( + read.buffer_size, + self.file_buffer_size, + ), + start_from: data.fd_position, + }, + ))) + .await; + } + } + + // File is not buffered. + None => { + self.request_queue.push_back(message_id, layer_id); + message_bus + .send(ClientMessage::FileRequest(FileRequest::Read(read))) + .await; + } + }, + + // Try to use local buffer if possible. + FileRequest::ReadLimited(read) => match self.buffered_files.get_mut(&read.remote_fd) { + // File is buffered. + Some(data) => { + let from_buffer = data.read_from_buffer(read.buffer_size, read.start_from); + if let Some(from_buffer) = from_buffer { + let bytes = from_buffer.to_vec(); + message_bus + .send(ToLayer { + message_id, + layer_id, + message: ProxyToLayerMessage::File(FileResponse::ReadLimited(Ok( + ReadFileResponse { + bytes, + read_amount: read.buffer_size, + }, + ))), + }) + .await; + } else { + let additional_data = AdditionalRequestData::ReadBuffered { + fd: read.remote_fd, + requested_amount: read.buffer_size, + update_fd_position: false, + }; + self.request_queue.push_back_with_data( + message_id, + layer_id, + additional_data, + ); + message_bus + .send(ClientMessage::FileRequest(FileRequest::ReadLimited( + ReadLimitedFileRequest { + remote_fd: read.remote_fd, + buffer_size: std::cmp::max( + read.buffer_size, + self.file_buffer_size, + ), + start_from: read.start_from, + }, + ))) + .await; + } + } + + // File is not buffered. + None => { + self.request_queue.push_back(message_id, layer_id); + message_bus + .send(ClientMessage::FileRequest(FileRequest::ReadLimited(read))) + .await; + } + }, + + // Try to use local buffer if possible. + FileRequest::ReadDir(read_dir) => match self.buffered_dirs.get_mut(&read_dir.remote_fd) + { + // Directory is buffered. + Some(data) => { + if let Some(direntry) = data.buffered_entries.next() { + message_bus + .send(ToLayer { + message_id, + layer_id, + message: ProxyToLayerMessage::File(FileResponse::ReadDir(Ok( + ReadDirResponse { + direntry: Some(direntry), + }, + ))), + }) + .await; + } else { + self.request_queue.push_back(message_id, layer_id); + message_bus + .send(ClientMessage::FileRequest(FileRequest::ReadDirBatch( + ReadDirBatchRequest { + remote_fd: read_dir.remote_fd, + amount: Self::READDIR_BATCH_SIZE, + }, + ))) + .await; + } + } + + // Directory is not buffered. + None => { + self.request_queue.push_back(message_id, layer_id); + message_bus + .send(ClientMessage::FileRequest(FileRequest::ReadDir(read_dir))) + .await; + } + }, + + // Not supported in old `mirrord-protocol` versions. + req @ FileRequest::ReadLink(..) => { + let supported = self + .protocol_version + .as_ref() + .is_some_and(|version| READLINK_VERSION.matches(version)); + + if supported { + self.request_queue.push_back(message_id, layer_id); + message_bus + .send(ProxyMessage::ToAgent(ClientMessage::FileRequest(req))) + .await; + } else { + message_bus + .send(ToLayer { + message_id, + message: ProxyToLayerMessage::File(FileResponse::ReadLink(Err( + ResponseError::NotImplemented, + ))), + layer_id, + }) + .await; + } + } + + // Should only be sent from intproxy, not from the layer. + FileRequest::ReadDirBatch(..) => { + unreachable!("ReadDirBatch request is never sent from the layer"); + } + + // May require storing additional data in the request queue. + FileRequest::Seek(mut seek) => { + let additional_data = + match (self.buffered_files.get_mut(&seek.fd), &mut seek.seek_from) { + (Some(data), SeekFromInternal::Current(diff)) => { + let result = u64::try_from(data.fd_position as i128 + *diff as i128); + match result { + Ok(offset) => seek.seek_from = SeekFromInternal::Start(offset), + Err(..) => { + message_bus + .send(ToLayer { + message_id, + layer_id, + message: ProxyToLayerMessage::File(FileResponse::Seek( + Err(ResponseError::RemoteIO(RemoteIOError { + raw_os_error: Some(22), // EINVAL + kind: ErrorKindInternal::InvalidInput, + })), + )), + }) + .await; + return; + } + } + + AdditionalRequestData::SeekBuffered { fd: seek.fd } + } + (Some(..), _) => AdditionalRequestData::SeekBuffered { fd: seek.fd }, + _ => AdditionalRequestData::Other, + }; + + self.request_queue + .push_back_with_data(message_id, layer_id, additional_data); + message_bus + .send(ClientMessage::FileRequest(FileRequest::Seek(seek))) + .await; + } + + FileRequest::MakeDir(_) | FileRequest::MakeDirAt(_) => { + let supported = self + .protocol_version + .as_ref() + .is_some_and(|version| MKDIR_VERSION.matches(version)); + + if supported { + self.request_queue.push_back(message_id, layer_id); + message_bus + .send(ProxyMessage::ToAgent(ClientMessage::FileRequest(request))) + .await; + } else { + let file_response = FileResponse::MakeDir(Err(ResponseError::NotImplemented)); + + message_bus + .send(ToLayer { + message_id, + message: ProxyToLayerMessage::File(file_response), + layer_id, + }) + .await; + } + } + + // Doesn't require any special logic. + other => { + self.request_queue.push_back(message_id, layer_id); + message_bus.send(ClientMessage::FileRequest(other)).await; + } + } + } + + #[tracing::instrument(level = Level::TRACE, skip(message_bus))] + async fn file_response( + &mut self, + response: FileResponse, + message_bus: &mut MessageBus, + ) -> Result<(), FilesProxyError> { + match response { + // Update file maps. + FileResponse::Open(Ok(open)) => { + let (message_id, layer_id, additional_data) = + self.request_queue.pop_front_with_data().ok_or_else(|| { + UnexpectedAgentMessage(DaemonMessage::File(FileResponse::Open(Ok( + open.clone() + )))) + })?; + + self.remote_files.add(layer_id, open.fd); + + if matches!(additional_data, AdditionalRequestData::OpenBuffered) { + self.buffered_files.insert(open.fd, Default::default()); + } + + message_bus + .send(ToLayer { + layer_id, + message_id, + message: ProxyToLayerMessage::File(FileResponse::Open(Ok(open))), + }) + .await; + } + + // Update dir maps. + FileResponse::OpenDir(Ok(open)) => { + let (message_id, layer_id) = self.request_queue.pop_front().ok_or_else(|| { + UnexpectedAgentMessage(DaemonMessage::File(FileResponse::OpenDir(Ok( + open.clone() + )))) + })?; + + self.remote_dirs.add(layer_id, open.fd); + + if self.buffer_dirs() { + self.buffered_dirs.insert(open.fd, Default::default()); + } + + message_bus + .send(ToLayer { + layer_id, + message_id, + message: ProxyToLayerMessage::File(FileResponse::OpenDir(Ok(open))), + }) + .await; + } + + // If the file is buffered, update `files_data`. + FileResponse::ReadLimited(Ok(read)) => { + let (message_id, layer_id, additional_data) = + self.request_queue.pop_front_with_data().ok_or_else(|| { + UnexpectedAgentMessage(DaemonMessage::File(FileResponse::ReadLimited(Ok( + read.clone(), + )))) + })?; + + let AdditionalRequestData::ReadBuffered { + fd, + requested_amount, + update_fd_position, + } = additional_data + else { + // This file is not buffered. + message_bus + .send(ToLayer { + message_id, + layer_id, + message: ProxyToLayerMessage::File(FileResponse::ReadLimited(Ok(read))), + }) + .await; + return Ok(()); + }; + + let Some(data) = self.buffered_files.get_mut(&fd) else { + // File must have been closed from other thread in user application. + message_bus + .send(ToLayer { + message_id, + layer_id, + message: ProxyToLayerMessage::File(FileResponse::ReadLimited(Err( + ResponseError::NotFound(fd), + ))), + }) + .await; + return Ok(()); + }; + + let bytes = read + .bytes + .get(..requested_amount as usize) + .unwrap_or(&read.bytes) + .to_vec(); + let read_amount = bytes.len() as u64; + let response = ReadFileResponse { bytes, read_amount }; + + data.buffer = read.bytes; + data.buffer_position = data.fd_position; + let message = if update_fd_position { + // User originally sent `FileRequest::Read`. + data.fd_position += response.read_amount; + FileResponse::Read(Ok(response)) + } else { + // User originally sent `FileRequest::ReadLimited`. + FileResponse::ReadLimited(Ok(response)) + }; + + message_bus + .send(ToLayer { + message_id, + layer_id, + message: ProxyToLayerMessage::File(message), + }) + .await; + } + + // If the file is buffered, update `files_data`. + FileResponse::Seek(Ok(seek)) => { + let (message_id, layer_id, additional_data) = + self.request_queue.pop_front_with_data().ok_or_else(|| { + UnexpectedAgentMessage(DaemonMessage::File(FileResponse::Seek(Ok( + seek.clone() + )))) + })?; + + if let AdditionalRequestData::SeekBuffered { fd } = additional_data { + let Some(data) = self.buffered_files.get_mut(&fd) else { + // File must have been closed from other thread in user application. + message_bus + .send(ToLayer { + message_id, + layer_id, + message: ProxyToLayerMessage::File(FileResponse::Seek(Err( + ResponseError::NotFound(fd), + ))), + }) + .await; + return Ok(()); + }; + + data.fd_position = seek.result_offset; + } + + message_bus + .send(ToLayer { + message_id, + layer_id, + message: ProxyToLayerMessage::File(FileResponse::Seek(Ok(seek))), + }) + .await; + } + + // Store extra entries in `dirs_data`. + FileResponse::ReadDirBatch(Ok(batch)) => { + let (message_id, layer_id) = self.request_queue.pop_front().ok_or_else(|| { + UnexpectedAgentMessage(DaemonMessage::File(FileResponse::ReadDirBatch(Ok( + batch.clone(), + )))) + })?; + + let Some(data) = self.buffered_dirs.get_mut(&batch.fd) else { + // Directory must have been closed from other thread in user application. + message_bus + .send(ToLayer { + message_id, + layer_id, + message: ProxyToLayerMessage::File(FileResponse::ReadDir(Err( + ResponseError::NotFound(batch.fd), + ))), + }) + .await; + return Ok(()); + }; + + let mut entries = batch.dir_entries.into_iter(); + let direntry = entries.next(); + data.buffered_entries = entries; + + message_bus + .send(ToLayer { + message_id, + layer_id, + message: ProxyToLayerMessage::File(FileResponse::ReadDir(Ok( + ReadDirResponse { direntry }, + ))), + }) + .await; + } + + // Doesn't require any special logic. + other => { + let (message_id, layer_id) = self + .request_queue + .pop_front() + .ok_or_else(|| UnexpectedAgentMessage(DaemonMessage::File(other.clone())))?; + message_bus + .send(ToLayer { + message_id, + layer_id, + message: ProxyToLayerMessage::File(other), + }) + .await; + } + } + + Ok(()) + } +} + +impl BackgroundTask for FilesProxy { + type MessageIn = FilesProxyMessage; + type MessageOut = ProxyMessage; + type Error = FilesProxyError; + + async fn run(mut self, message_bus: &mut MessageBus) -> Result<(), Self::Error> { + while let Some(message) = message_bus.recv().await { + tracing::trace!(?message, "new message in message_bus"); + + match message { + FilesProxyMessage::FileReq(message_id, layer_id, request) => { + self.file_request(request, layer_id, message_id, message_bus) + .await; + } + FilesProxyMessage::FileRes(response) => { + self.file_response(response, message_bus).await?; + } + FilesProxyMessage::LayerClosed(closed) => { + self.layer_closed(closed, message_bus).await; + } + FilesProxyMessage::LayerForked(forked) => self.layer_forked(forked), + FilesProxyMessage::ProtocolVersion(version) => self.protocol_version(version), + } + } + + tracing::trace!("message bus closed, exiting"); + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use std::path::PathBuf; + + use mirrord_intproxy_protocol::{LayerId, ProxyToLayerMessage}; + use mirrord_protocol::{ + file::{ + FdOpenDirRequest, OpenDirResponse, OpenFileRequest, OpenFileResponse, + OpenOptionsInternal, ReadDirBatchRequest, ReadDirBatchResponse, ReadDirRequest, + ReadDirResponse, ReadFileRequest, ReadFileResponse, ReadLimitedFileRequest, + SeekFileRequest, SeekFileResponse, SeekFromInternal, + }, + ClientMessage, ErrorKindInternal, FileRequest, FileResponse, RemoteIOError, ResponseError, + }; + use rstest::rstest; + use semver::Version; + + use super::{FilesProxy, FilesProxyMessage}; + use crate::{ + background_tasks::{BackgroundTasks, TaskSender, TaskUpdate}, + error::IntProxyError, + main_tasks::{MainTaskId, ProxyMessage, ToLayer}, + }; + + /// Sets up a [`TaskSender`] and [`BackgroundTasks`] for a functioning [`FilesProxy`]. + /// + /// - `protocol_version`: allows specifying the version of the protocol to use for testing out + /// potential mismatches in messages. + /// - `buffer_reads`: configures buffering readonly files + async fn setup_proxy( + protocol_version: Version, + file_buffer_size: u64, + ) -> ( + TaskSender, + BackgroundTasks, + ) { + let mut tasks: BackgroundTasks = + Default::default(); + + let proxy = tasks.register( + FilesProxy::new(file_buffer_size), + MainTaskId::FilesProxy, + 32, + ); + + proxy + .send(FilesProxyMessage::ProtocolVersion(protocol_version)) + .await; + + (proxy, tasks) + } + + /// Convenience for opening a dir. + async fn prepare_dir( + proxy: &TaskSender, + tasks: &mut BackgroundTasks, + ) { + let request = FileRequest::FdOpenDir(FdOpenDirRequest { remote_fd: 0xdad }); + proxy + .send(FilesProxyMessage::FileReq(0xbad, LayerId(0xa55), request)) + .await; + let (_, update) = tasks.next().await.unzip(); + + assert!( + matches!( + update, + Some(TaskUpdate::Message(ProxyMessage::ToAgent( + ClientMessage::FileRequest(FileRequest::FdOpenDir(FdOpenDirRequest { + remote_fd: 0xdad + }),) + ))) + ), + "Mismatched message for `FdOpenDirRequest` {update:?}!" + ); + + let response = FileResponse::OpenDir(Ok(OpenDirResponse { fd: 0xdad })); + proxy.send(FilesProxyMessage::FileRes(response)).await; + let (_, update) = tasks.next().await.unzip(); + + assert!( + matches!( + update, + Some(TaskUpdate::Message(ProxyMessage::ToLayer(ToLayer { + message_id: 0xbad, + layer_id: LayerId(0xa55), + message: ProxyToLayerMessage::File(FileResponse::OpenDir(Ok( + OpenDirResponse { .. } + ))) + }))) + ), + "Mismatched message for `OpenDirResponse` {update:?}!" + ); + } + + #[tokio::test] + async fn old_protocol_uses_read_dir_request() { + let (proxy, mut tasks) = setup_proxy(Version::new(0, 1, 0), 0).await; + + prepare_dir(&proxy, &mut tasks).await; + + let readdir_request = FileRequest::ReadDir(ReadDirRequest { remote_fd: 0xdad }); + proxy + .send(FilesProxyMessage::FileReq( + 0xbad, + LayerId(0xa55), + readdir_request, + )) + .await; + let (_, update) = tasks.next().await.unzip(); + + assert!( + matches!( + update, + Some(TaskUpdate::Message(ProxyMessage::ToAgent( + ClientMessage::FileRequest(FileRequest::ReadDir(ReadDirRequest { .. })) + ))) + ), + "Mismatched message for `ReadDirRequest` {update:?}!" + ); + + let readdir_response = FileResponse::ReadDir(Ok(ReadDirResponse { direntry: None })); + proxy + .send(FilesProxyMessage::FileRes(readdir_response)) + .await; + let (_, update) = tasks.next().await.unzip(); + + assert!( + matches!( + update, + Some(TaskUpdate::Message(ProxyMessage::ToLayer(ToLayer { + message_id: 0xbad, + layer_id: LayerId(0xa55), + message: ProxyToLayerMessage::File(FileResponse::ReadDir(Ok( + ReadDirResponse { .. } + ))) + }))) + ), + "Mismatched message for `ReadDirResponse` {update:?}!" + ); + + drop(proxy); + let results = tasks.results().await; + for (_, result) in results { + assert!(result.is_ok(), "{result:?}"); + } + } + + #[tokio::test] + async fn new_protocol_uses_read_dir_batch_request() { + let (proxy, mut tasks) = setup_proxy(Version::new(1, 9, 0), 0).await; + + prepare_dir(&proxy, &mut tasks).await; + + let request = FileRequest::ReadDir(ReadDirRequest { remote_fd: 0xdad }); + proxy + .send(FilesProxyMessage::FileReq(0xbad, LayerId(0xa55), request)) + .await; + let (_, update) = tasks.next().await.unzip(); + + assert!( + matches!( + update, + Some(TaskUpdate::Message(ProxyMessage::ToAgent( + ClientMessage::FileRequest(FileRequest::ReadDirBatch(ReadDirBatchRequest { + remote_fd: 0xdad, + amount: FilesProxy::READDIR_BATCH_SIZE, + })) + ))) + ), + "Mismatched message for `ReadDirBatchRequest` {update:?}!" + ); + + let response = FileResponse::ReadDirBatch(Ok(ReadDirBatchResponse { + fd: 0xdad, + dir_entries: Vec::new(), + })); + proxy.send(FilesProxyMessage::FileRes(response)).await; + let (_, update) = tasks.next().await.unzip(); + + assert!( + matches!( + update, + Some(TaskUpdate::Message(ProxyMessage::ToLayer(ToLayer { + message_id: 0xbad, + layer_id: LayerId(0xa55), + message: ProxyToLayerMessage::File(FileResponse::ReadDir(Ok( + ReadDirResponse { .. } + ))) + }))) + ), + "Mismatched message for `ReadDirBatchResponse` {update:?}!" + ); + + drop(proxy); + let results = tasks.results().await; + for (_, result) in results { + assert!(result.is_ok(), "{result:?}"); + } + } + + /// Helper function for opening a file in a running [`FilesProxy`]. + async fn open_file( + proxy: &TaskSender, + tasks: &mut BackgroundTasks, + readonly: bool, + ) -> u64 { + let message_id = rand::random(); + let fd = rand::random(); + + let request = FileRequest::Open(OpenFileRequest { + path: PathBuf::from("/some/path"), + open_options: OpenOptionsInternal { + read: true, + write: !readonly, + ..Default::default() + }, + }); + proxy + .send(FilesProxyMessage::FileReq( + message_id, + LayerId(0), + request.clone(), + )) + .await; + let update = tasks.next().await.unwrap().1.unwrap_message(); + assert_eq!( + update, + ProxyMessage::ToAgent(ClientMessage::FileRequest(request)), + ); + + let response = FileResponse::Open(Ok(OpenFileResponse { fd })); + proxy + .send(FilesProxyMessage::FileRes(response.clone())) + .await; + let update = tasks.next().await.unwrap().1.unwrap_message(); + assert_eq!( + update, + ProxyMessage::ToLayer(ToLayer { + message_id, + layer_id: LayerId(0), + message: ProxyToLayerMessage::File(response), + }) + ); + + fd + } + + async fn make_read_request( + proxy: &TaskSender, + tasks: &mut BackgroundTasks, + remote_fd: u64, + buffer_size: u64, + start_from: Option, + ) -> ProxyMessage { + let message_id = rand::random(); + let request = if let Some(start_from) = start_from { + FileRequest::ReadLimited(ReadLimitedFileRequest { + remote_fd, + buffer_size, + start_from, + }) + } else { + FileRequest::Read(ReadFileRequest { + remote_fd, + buffer_size, + }) + }; + + proxy + .send(FilesProxyMessage::FileReq(message_id, LayerId(0), request)) + .await; + tasks.next().await.unwrap().1.unwrap_message() + } + + async fn respond_to_read_request( + proxy: &TaskSender, + tasks: &mut BackgroundTasks, + data: Vec, + limited: bool, + ) -> ProxyMessage { + let response = ReadFileResponse { + read_amount: data.len() as u64, + bytes: data, + }; + let response = if limited { + FileResponse::ReadLimited(Ok(response)) + } else { + FileResponse::Read(Ok(response)) + }; + + proxy.send(FilesProxyMessage::FileRes(response)).await; + tasks.next().await.unwrap().1.unwrap_message() + } + + #[rstest] + #[case(true, false)] + #[case(false, true)] + #[case(false, false)] + #[tokio::test] + async fn reading_from_unbuffered_file(#[case] readonly: bool, #[case] buffering_enabled: bool) { + let (proxy, mut tasks) = setup_proxy( + mirrord_protocol::VERSION.clone(), + buffering_enabled.then_some(4096).unwrap_or_default(), + ) + .await; + + let fd = open_file(&proxy, &mut tasks, readonly).await; + + let update = make_read_request(&proxy, &mut tasks, fd, 10, None).await; + assert_eq!( + update, + ProxyMessage::ToAgent(ClientMessage::FileRequest(FileRequest::Read( + ReadFileRequest { + remote_fd: fd, + buffer_size: 10, + } + ))), + ); + + let update = respond_to_read_request(&proxy, &mut tasks, vec![0; 10], false) + .await + .unwrap_proxy_to_layer_message(); + assert_eq!( + update, + ProxyToLayerMessage::File(FileResponse::Read(Ok(ReadFileResponse { + bytes: vec![0; 10], + read_amount: 10, + }))), + ); + + let update = make_read_request(&proxy, &mut tasks, fd, 1, Some(13)).await; + assert_eq!( + update, + ProxyMessage::ToAgent(ClientMessage::FileRequest(FileRequest::ReadLimited( + ReadLimitedFileRequest { + remote_fd: fd, + buffer_size: 1, + start_from: 13, + } + ))), + ); + + let update = respond_to_read_request(&proxy, &mut tasks, vec![2], true) + .await + .unwrap_proxy_to_layer_message(); + assert_eq!( + update, + ProxyToLayerMessage::File(FileResponse::ReadLimited(Ok(ReadFileResponse { + bytes: vec![2], + read_amount: 1, + }))), + ); + } + + #[tokio::test] + async fn reading_from_buffered_file() { + let (proxy, mut tasks) = setup_proxy(mirrord_protocol::VERSION.clone(), 4096).await; + + let fd = open_file(&proxy, &mut tasks, true).await; + let contents = std::iter::repeat(0_u8..=255).flatten(); + + let update = make_read_request(&proxy, &mut tasks, fd, 1, None).await; + assert_eq!( + update, + ProxyMessage::ToAgent(ClientMessage::FileRequest(FileRequest::ReadLimited( + ReadLimitedFileRequest { + remote_fd: fd, + buffer_size: 4096, + start_from: 0, + } + ))), + ); + + let data = contents.clone().take(4096).collect::>(); + let update = respond_to_read_request(&proxy, &mut tasks, data, true) + .await + .unwrap_proxy_to_layer_message(); + assert_eq!( + update, + ProxyToLayerMessage::File(FileResponse::Read(Ok(ReadFileResponse { + bytes: vec![0], + read_amount: 1, + }))), + ); + + for i in 1..=3 { + let update = make_read_request(&proxy, &mut tasks, fd, 1, None) + .await + .unwrap_proxy_to_layer_message(); + assert_eq!( + update, + ProxyToLayerMessage::File(FileResponse::Read(Ok(ReadFileResponse { + bytes: vec![i], + read_amount: 1, + }))), + ); + } + + let expected = contents.clone().skip(256).take(512).collect::>(); + let update = make_read_request(&proxy, &mut tasks, fd, 512, Some(256)) + .await + .unwrap_proxy_to_layer_message(); + assert_eq!( + update, + ProxyToLayerMessage::File(FileResponse::ReadLimited(Ok(ReadFileResponse { + bytes: expected, + read_amount: 512, + }))), + ); + + let update = make_read_request(&proxy, &mut tasks, fd, 4096 * 2, None).await; + assert_eq!( + update, + ProxyMessage::ToAgent(ClientMessage::FileRequest(FileRequest::ReadLimited( + ReadLimitedFileRequest { + remote_fd: fd, + buffer_size: 4096 * 2, + start_from: 4, + } + ))), + ); + + let data = contents.clone().skip(4).take(4096).collect::>(); + let update = respond_to_read_request(&proxy, &mut tasks, data.clone(), true) + .await + .unwrap_proxy_to_layer_message(); + assert_eq!( + update, + ProxyToLayerMessage::File(FileResponse::Read(Ok(ReadFileResponse { + bytes: data, + read_amount: 4096, + }))), + ); + + let seek_request = FileRequest::Seek(SeekFileRequest { + fd, + seek_from: SeekFromInternal::Start(444), + }); + proxy + .send(FilesProxyMessage::FileReq( + rand::random(), + LayerId(0), + seek_request.clone(), + )) + .await; + let update = tasks.next().await.unwrap().1.unwrap_message(); + assert_eq!( + update, + ProxyMessage::ToAgent(ClientMessage::FileRequest(seek_request)), + ); + let seek_response = FileResponse::Seek(Ok(SeekFileResponse { result_offset: 444 })); + proxy + .send(FilesProxyMessage::FileRes(seek_response.clone())) + .await; + let update = tasks + .next() + .await + .unwrap() + .1 + .unwrap_message() + .unwrap_proxy_to_layer_message(); + assert_eq!(update, ProxyToLayerMessage::File(seek_response),); + + let expected = contents.clone().skip(444).take(10).collect::>(); + let update = make_read_request(&proxy, &mut tasks, fd, 10, None) + .await + .unwrap_proxy_to_layer_message(); + assert_eq!( + update, + ProxyToLayerMessage::File(FileResponse::Read(Ok(ReadFileResponse { + bytes: expected, + read_amount: 10, + }))) + ); + } + + #[tokio::test] + async fn seeking_in_buffered_file() { + let (proxy, mut tasks) = setup_proxy(mirrord_protocol::VERSION.clone(), 4096).await; + + let fd = open_file(&proxy, &mut tasks, true).await; + let contents = std::iter::repeat(0_u8..=255).flatten(); + + let update = make_read_request(&proxy, &mut tasks, fd, 20, None).await; + assert_eq!( + update, + ProxyMessage::ToAgent(ClientMessage::FileRequest(FileRequest::ReadLimited( + ReadLimitedFileRequest { + remote_fd: fd, + buffer_size: 4096, + start_from: 0, + } + ))), + ); + + let data = contents.clone().take(4096).collect::>(); + let expected = contents.take(20).collect::>(); + let update = respond_to_read_request(&proxy, &mut tasks, data, true) + .await + .unwrap_proxy_to_layer_message(); + assert_eq!( + update, + ProxyToLayerMessage::File(FileResponse::Read(Ok(ReadFileResponse { + bytes: expected, + read_amount: 20, + }))), + ); + + let seek_request = FileRequest::Seek(SeekFileRequest { + fd, + seek_from: SeekFromInternal::Current(-30), + }); + proxy + .send(FilesProxyMessage::FileReq( + rand::random(), + LayerId(0), + seek_request.clone(), + )) + .await; + let update = tasks + .next() + .await + .unwrap() + .1 + .unwrap_message() + .unwrap_proxy_to_layer_message(); + assert_eq!( + update, + ProxyToLayerMessage::File(FileResponse::Seek(Err(ResponseError::RemoteIO( + RemoteIOError { + raw_os_error: Some(22), + kind: ErrorKindInternal::InvalidInput, + } + )))) + ); + + let seek_request = FileRequest::Seek(SeekFileRequest { + fd, + seek_from: SeekFromInternal::Current(-10), + }); + proxy + .send(FilesProxyMessage::FileReq( + rand::random(), + LayerId(0), + seek_request.clone(), + )) + .await; + let update = tasks.next().await.unwrap().1.unwrap_message(); + assert_eq!( + update, + ProxyMessage::ToAgent(ClientMessage::FileRequest(FileRequest::Seek( + SeekFileRequest { + fd, + seek_from: SeekFromInternal::Start(10), + } + ))), + ); + let seek_response = FileResponse::Seek(Ok(SeekFileResponse { result_offset: 10 })); + proxy + .send(FilesProxyMessage::FileRes(seek_response.clone())) + .await; + let update = tasks + .next() + .await + .unwrap() + .1 + .unwrap_message() + .unwrap_proxy_to_layer_message(); + assert_eq!(update, ProxyToLayerMessage::File(seek_response),); + } +} diff --git a/mirrord/intproxy/src/proxies/outgoing.rs b/mirrord/intproxy/src/proxies/outgoing.rs index 0f327185a26..d7782053aa9 100644 --- a/mirrord/intproxy/src/proxies/outgoing.rs +++ b/mirrord/intproxy/src/proxies/outgoing.rs @@ -8,7 +8,7 @@ use mirrord_intproxy_protocol::{ }; use mirrord_protocol::{ outgoing::{tcp::DaemonTcpOutgoing, udp::DaemonUdpOutgoing, DaemonConnect, DaemonRead}, - ConnectionId, RemoteResult, ResponseError, + ConnectionId, DaemonMessage, RemoteResult, ResponseError, }; use thiserror::Error; use tracing::Level; @@ -16,9 +16,10 @@ use tracing::Level; use self::interceptor::Interceptor; use crate::{ background_tasks::{BackgroundTask, BackgroundTasks, MessageBus, TaskSender, TaskUpdate}, + error::UnexpectedAgentMessage, main_tasks::ToLayer, proxies::outgoing::net_protocol_ext::NetProtocolExt, - request_queue::{RequestQueue, RequestQueueEmpty}, + request_queue::RequestQueue, ProxyMessage, }; @@ -35,8 +36,8 @@ pub enum OutgoingProxyError { ResponseError(#[from] ResponseError), /// The agent sent a [`DaemonConnect`] response, but the [`RequestQueue`] for layer's connec /// requests was empty. This should never happen. - #[error("failed to match connect response: {0}")] - RequestQueueEmpty(#[from] RequestQueueEmpty), + #[error(transparent)] + UnexpectedAgentMessage(#[from] UnexpectedAgentMessage), /// The proxy failed to prepare a new local socket for the intercepted connection. #[error("failed to prepare local socket: {0}")] SocketSetupError(#[from] io::Error), @@ -142,7 +143,17 @@ impl OutgoingProxy { protocol: NetProtocol, message_bus: &mut MessageBus, ) -> Result<(), OutgoingProxyError> { - let (message_id, layer_id) = self.queue(protocol).get()?; + let (message_id, layer_id) = self.queue(protocol).pop_front().ok_or_else(|| { + let message = match protocol { + NetProtocol::Datagrams => { + DaemonMessage::UdpOutgoing(DaemonUdpOutgoing::Connect(connect.clone())) + } + NetProtocol::Stream => { + DaemonMessage::TcpOutgoing(DaemonTcpOutgoing::Connect(connect.clone())) + } + }; + UnexpectedAgentMessage(message) + })?; let connect = match connect { Ok(connect) => connect, @@ -203,7 +214,8 @@ impl OutgoingProxy { request: OutgoingConnectRequest, message_bus: &mut MessageBus, ) { - self.queue(request.protocol).insert(message_id, session_id); + self.queue(request.protocol) + .push_back(message_id, session_id); let msg = request.protocol.wrap_agent_connect(request.remote_address); message_bus.send(ProxyMessage::ToAgent(msg)).await; diff --git a/mirrord/intproxy/src/proxies/simple.rs b/mirrord/intproxy/src/proxies/simple.rs index f7fe874068c..dae7881247e 100644 --- a/mirrord/intproxy/src/proxies/simple.rs +++ b/mirrord/intproxy/src/proxies/simple.rs @@ -1,335 +1,66 @@ //! The most basic proxying logic. Handles cases when the only job to do in the internal proxy is to //! pass requests and responses between the layer and the agent. -use std::{collections::HashMap, vec::IntoIter}; +use std::collections::HashMap; use mirrord_intproxy_protocol::{LayerId, MessageId, ProxyToLayerMessage}; use mirrord_protocol::{ dns::{GetAddrInfoRequest, GetAddrInfoResponse}, - file::{ - CloseDirRequest, CloseFileRequest, DirEntryInternal, OpenDirResponse, OpenFileResponse, - ReadDirBatchRequest, ReadDirBatchResponse, ReadDirRequest, ReadDirResponse, - READDIR_BATCH_VERSION, - }, - ClientMessage, FileRequest, FileResponse, GetEnvVarsRequest, RemoteResult, ResponseError, + ClientMessage, DaemonMessage, GetEnvVarsRequest, RemoteResult, }; -use semver::Version; use thiserror::Error; use crate::{ background_tasks::{BackgroundTask, MessageBus}, - main_tasks::{LayerClosed, LayerForked, ToLayer}, - remote_resources::RemoteResources, - request_queue::{RequestQueue, RequestQueueEmpty}, + error::UnexpectedAgentMessage, + main_tasks::ToLayer, + request_queue::RequestQueue, ProxyMessage, }; #[derive(Debug)] pub enum SimpleProxyMessage { - FileReq(MessageId, LayerId, FileRequest), - FileRes(FileResponse), AddrInfoReq(MessageId, LayerId, GetAddrInfoRequest), AddrInfoRes(GetAddrInfoResponse), - LayerForked(LayerForked), - LayerClosed(LayerClosed), GetEnvReq(MessageId, LayerId, GetEnvVarsRequest), GetEnvRes(RemoteResult>), - ProtocolVersion(Version), -} - -#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] -pub(crate) enum RemoteFd { - File(u64), - Dir(u64), -} - -#[derive(Clone)] -pub(crate) enum FileResource { - File, - Dir { - dirs_iter: IntoIter, - }, } #[derive(Error, Debug)] -enum FileError { - #[error("Resource `{0}` not found!")] - MissingResource(u64), - - #[error("Dir operation called on file `{0}`!")] - DirOnFile(u64), -} - -impl From for ResponseError { - fn from(file_fail: FileError) -> Self { - match file_fail { - FileError::MissingResource(remote_fd) => ResponseError::NotFound(remote_fd), - FileError::DirOnFile(remote_fd) => ResponseError::NotDirectory(remote_fd), - } - } -} - -impl FileResource { - fn next_dir(&mut self, remote_fd: u64) -> Result, FileError> { - match self { - FileResource::Dir { dirs_iter } => dirs_iter.next().map(Ok).transpose(), - FileResource::File => Err(FileError::DirOnFile(remote_fd)), - } - } -} +#[error(transparent)] +pub struct SimpleProxyError(#[from] UnexpectedAgentMessage); /// For passing messages between the layer and the agent without custom internal logic. /// Run as a [`BackgroundTask`]. #[derive(Default)] pub struct SimpleProxy { - /// Remote descriptors for open files and directories. Allows tracking across layer forks. - remote_fds: RemoteResources, - /// For [`FileRequest`]s. - file_reqs: RequestQueue, /// For [`GetAddrInfoRequest`]s. addr_info_reqs: RequestQueue, /// For [`GetEnvVarsRequest`]s. get_env_reqs: RequestQueue, } -impl SimpleProxy { - /// `readdir` works by keeping an iterator of all the `dir`s, and a call to it is - /// equivalent to doing `iterator.next()`. - /// - /// For efficiency, whenever we receive a `readdir` request from the layer, we try to - /// read more than just `next()` from the agent, while returning just the next `direntry` - /// back to layer. - async fn handle_readdir( - &mut self, - layer_id: LayerId, - remote_fd: u64, - message_id: u64, - protocol_version: Option<&Version>, - message_bus: &mut MessageBus, - ) -> Result<(), FileError> { - let resource = self - .remote_fds - .get_mut(&layer_id, &RemoteFd::Dir(remote_fd)) - .ok_or(FileError::MissingResource(remote_fd))?; - - if let Some(dir) = resource.next_dir(remote_fd)? { - message_bus - .send(ToLayer { - message_id, - message: ProxyToLayerMessage::File(FileResponse::ReadDir(Ok( - ReadDirResponse { - direntry: Some(dir), - }, - ))), - layer_id, - }) - .await; - } else { - self.file_reqs.insert(message_id, layer_id); - - let request = - if protocol_version.is_some_and(|version| READDIR_BATCH_VERSION.matches(version)) { - FileRequest::ReadDirBatch(ReadDirBatchRequest { - remote_fd, - amount: 128, - }) - } else { - FileRequest::ReadDir(ReadDirRequest { remote_fd }) - }; - - // Convert it into a `ReadDirBatch` for the agent. - message_bus - .send(ProxyMessage::ToAgent(ClientMessage::FileRequest(request))) - .await; - } - - Ok(()) - } -} - impl BackgroundTask for SimpleProxy { - type Error = RequestQueueEmpty; + type Error = SimpleProxyError; type MessageIn = SimpleProxyMessage; type MessageOut = ProxyMessage; - async fn run(mut self, message_bus: &mut MessageBus) -> Result<(), RequestQueueEmpty> { - let mut protocol_version = None; - + async fn run(mut self, message_bus: &mut MessageBus) -> Result<(), Self::Error> { while let Some(msg) = message_bus.recv().await { tracing::trace!(?msg, "new message in message_bus"); match msg { - SimpleProxyMessage::ProtocolVersion(new_protocol_version) => { - protocol_version = Some(new_protocol_version); - } - SimpleProxyMessage::FileReq( - _, - layer_id, - FileRequest::Close(CloseFileRequest { fd }), - ) => { - let do_close = self.remote_fds.remove(layer_id, RemoteFd::File(fd)); - if do_close { - message_bus - .send(ClientMessage::FileRequest(FileRequest::Close( - CloseFileRequest { fd }, - ))) - .await; - } - } - SimpleProxyMessage::FileReq( - _, - layer_id, - FileRequest::CloseDir(CloseDirRequest { remote_fd }), - ) => { - let do_close = self.remote_fds.remove(layer_id, RemoteFd::Dir(remote_fd)); - if do_close { - message_bus - .send(ClientMessage::FileRequest(FileRequest::CloseDir( - CloseDirRequest { remote_fd }, - ))) - .await; - } - } - SimpleProxyMessage::FileReq( - message_id, - layer_id, - FileRequest::ReadDir(ReadDirRequest { remote_fd }), - ) => { - if let Err(fail) = self - .handle_readdir( - layer_id, - remote_fd, - message_id, - protocol_version.as_ref(), - message_bus, - ) - .await - { - // Send local failure to layer. - message_bus - .send(ToLayer { - message_id, - message: ProxyToLayerMessage::File(FileResponse::ReadDir(Err( - fail.into(), - ))), - layer_id, - }) - .await; - } - } - // TODO(alex): We can remove this case when users are up-to-date for `readlink`. - SimpleProxyMessage::FileReq( - message_id, - layer_id, - req @ FileRequest::ReadLink(_), - ) => { - if protocol_version - .as_ref() - .is_some_and(|version| READDIR_BATCH_VERSION.matches(version)) - { - self.file_reqs.insert(message_id, layer_id); - message_bus - .send(ProxyMessage::ToAgent(ClientMessage::FileRequest(req))) - .await; - } else { - message_bus - .send(ToLayer { - message_id, - message: ProxyToLayerMessage::File(FileResponse::ReadLink(Err( - ResponseError::NotImplemented, - ))), - layer_id, - }) - .await; - } - } - SimpleProxyMessage::FileReq(message_id, layer_id, req) => { - self.file_reqs.insert(message_id, layer_id); - message_bus - .send(ProxyMessage::ToAgent(ClientMessage::FileRequest(req))) - .await; - } - SimpleProxyMessage::FileRes(FileResponse::Open(Ok(OpenFileResponse { fd }))) => { - let (message_id, layer_id) = self.file_reqs.get()?; - - self.remote_fds - .add(layer_id, RemoteFd::File(fd), FileResource::File); - - message_bus - .send(ToLayer { - message_id, - message: ProxyToLayerMessage::File(FileResponse::Open(Ok( - OpenFileResponse { fd }, - ))), - layer_id, - }) - .await; - } - SimpleProxyMessage::FileRes(FileResponse::OpenDir(Ok(OpenDirResponse { fd }))) => { - let (message_id, layer_id) = self.file_reqs.get()?; - - self.remote_fds.add( - layer_id, - RemoteFd::Dir(fd), - FileResource::Dir { - dirs_iter: IntoIter::default(), - }, - ); - - message_bus - .send(ToLayer { - message_id, - message: ProxyToLayerMessage::File(FileResponse::OpenDir(Ok( - OpenDirResponse { fd }, - ))), - layer_id, - }) - .await; - } - SimpleProxyMessage::FileRes(FileResponse::ReadDirBatch(Ok( - ReadDirBatchResponse { fd, dir_entries }, - ))) => { - let (message_id, layer_id) = self.file_reqs.get()?; - - let mut entries_iter = dir_entries.into_iter(); - let direntry = entries_iter.next(); - - message_bus - .send(ToLayer { - message_id, - message: ProxyToLayerMessage::File(FileResponse::ReadDir(Ok( - ReadDirResponse { direntry }, - ))), - layer_id, - }) - .await; - - if let Some(FileResource::Dir { dirs_iter }) = - self.remote_fds.get_mut(&layer_id, &RemoteFd::Dir(fd)) - { - *dirs_iter = entries_iter; - } - } - SimpleProxyMessage::FileRes(res) => { - let (message_id, layer_id) = self.file_reqs.get()?; - message_bus - .send(ToLayer { - message_id, - message: ProxyToLayerMessage::File(res), - layer_id, - }) - .await; - } SimpleProxyMessage::AddrInfoReq(message_id, session_id, req) => { - self.addr_info_reqs.insert(message_id, session_id); + self.addr_info_reqs.push_back(message_id, session_id); message_bus - .send(ProxyMessage::ToAgent(ClientMessage::GetAddrInfoRequest( - req, - ))) + .send(ClientMessage::GetAddrInfoRequest(req)) .await; } SimpleProxyMessage::AddrInfoRes(res) => { - let (message_id, layer_id) = self.addr_info_reqs.get()?; + let (message_id, layer_id) = + self.addr_info_reqs.pop_front().ok_or_else(|| { + UnexpectedAgentMessage(DaemonMessage::GetAddrInfoResponse(res.clone())) + })?; message_bus .send(ToLayer { message_id, @@ -338,29 +69,17 @@ impl BackgroundTask for SimpleProxy { }) .await; } - SimpleProxyMessage::LayerClosed(LayerClosed { id }) => { - for to_close in self.remote_fds.remove_all(id) { - let req = match to_close { - RemoteFd::Dir(remote_fd) => { - FileRequest::CloseDir(CloseDirRequest { remote_fd }) - } - RemoteFd::File(fd) => FileRequest::Close(CloseFileRequest { fd }), - }; - - message_bus.send(ClientMessage::FileRequest(req)).await; - } - } - SimpleProxyMessage::LayerForked(LayerForked { child, parent }) => { - self.remote_fds.clone_all(parent, child); - } SimpleProxyMessage::GetEnvReq(message_id, layer_id, req) => { - self.get_env_reqs.insert(message_id, layer_id); + self.get_env_reqs.push_back(message_id, layer_id); message_bus - .send(ProxyMessage::ToAgent(ClientMessage::GetEnvVarsRequest(req))) + .send(ClientMessage::GetEnvVarsRequest(req)) .await; } SimpleProxyMessage::GetEnvRes(res) => { - let (message_id, layer_id) = self.get_env_reqs.get()?; + let (message_id, layer_id) = + self.get_env_reqs.pop_front().ok_or_else(|| { + UnexpectedAgentMessage(DaemonMessage::GetEnvVarsResponse(res.clone())) + })?; message_bus .send(ToLayer { message_id, @@ -373,199 +92,7 @@ impl BackgroundTask for SimpleProxy { } tracing::trace!("message bus closed, exiting"); - Ok(()) - } -} - -#[cfg(test)] -mod tests { - - use mirrord_intproxy_protocol::{LayerId, ProxyToLayerMessage}; - use mirrord_protocol::{ - file::{ - FdOpenDirRequest, OpenDirResponse, ReadDirBatchRequest, ReadDirBatchResponse, - ReadDirRequest, ReadDirResponse, - }, - ClientMessage, FileRequest, FileResponse, - }; - use semver::Version; - - use super::SimpleProxy; - use crate::{ - background_tasks::{BackgroundTasks, TaskSender, TaskUpdate}, - error::IntProxyError, - main_tasks::{MainTaskId, ProxyMessage, ToLayer}, - proxies::simple::SimpleProxyMessage, - }; - - /// Sets up a [`TaskSender`] and [`BackgroundTasks`] for a functioning [`SimpleProxy`]. - /// - /// - `protocol_version`: allows specifying the version of the protocol to use for testing out - /// potential mismatches in messages. - async fn setup_proxy( - protocol_version: Version, - ) -> ( - TaskSender, - BackgroundTasks, - ) { - let mut tasks: BackgroundTasks = - Default::default(); - let proxy = tasks.register(SimpleProxy::default(), MainTaskId::SimpleProxy, 32); - - proxy - .send(SimpleProxyMessage::ProtocolVersion(protocol_version)) - .await; - - (proxy, tasks) - } - - /// Convenience for opening a dir. - async fn prepare_dir( - proxy: &TaskSender, - tasks: &mut BackgroundTasks, - ) { - let request = FileRequest::FdOpenDir(FdOpenDirRequest { remote_fd: 0xdad }); - proxy - .send(SimpleProxyMessage::FileReq(0xbad, LayerId(0xa55), request)) - .await; - let (_, update) = tasks.next().await.unzip(); - - assert!( - matches!( - update, - Some(TaskUpdate::Message(ProxyMessage::ToAgent( - ClientMessage::FileRequest(FileRequest::FdOpenDir(FdOpenDirRequest { .. }),) - ))) - ), - "Mismatched message for `FdOpenDirRequest` {update:?}!" - ); - - let response = FileResponse::OpenDir(Ok(OpenDirResponse { fd: 0xdad })); - proxy.send(SimpleProxyMessage::FileRes(response)).await; - let (_, update) = tasks.next().await.unzip(); - - assert!( - matches!( - update, - Some(TaskUpdate::Message(ProxyMessage::ToLayer(ToLayer { - message_id: 0xbad, - layer_id: LayerId(0xa55), - message: ProxyToLayerMessage::File(FileResponse::OpenDir(Ok( - OpenDirResponse { .. } - ))) - }))) - ), - "Mismatched message for `OpenDirResponse` {update:?}!" - ); - } - - #[tokio::test] - async fn old_protocol_uses_read_dir_request() { - let (proxy, mut tasks) = setup_proxy(Version::new(0, 1, 0)).await; - - prepare_dir(&proxy, &mut tasks).await; - - let readdir_request = FileRequest::ReadDir(ReadDirRequest { remote_fd: 0xdad }); - proxy - .send(SimpleProxyMessage::FileReq( - 0xbad, - LayerId(0xa55), - readdir_request, - )) - .await; - let (_, update) = tasks.next().await.unzip(); - - assert!( - matches!( - update, - Some(TaskUpdate::Message(ProxyMessage::ToAgent( - ClientMessage::FileRequest(FileRequest::ReadDir(ReadDirRequest { .. })) - ))) - ), - "Mismatched message for `ReadDirRequest` {update:?}!" - ); - - let readdir_response = FileResponse::ReadDir(Ok(ReadDirResponse { direntry: None })); - proxy - .send(SimpleProxyMessage::FileRes(readdir_response)) - .await; - let (_, update) = tasks.next().await.unzip(); - - assert!( - matches!( - update, - Some(TaskUpdate::Message(ProxyMessage::ToLayer(ToLayer { - message_id: 0xbad, - layer_id: LayerId(0xa55), - message: ProxyToLayerMessage::File(FileResponse::ReadDir(Ok( - ReadDirResponse { .. } - ))) - }))) - ), - "Mismatched message for `ReadDirResponse` {update:?}!" - ); - - drop(proxy); - let results = tasks.results().await; - for (_, result) in results { - assert!(result.is_ok(), "{result:?}"); - } - } - - #[tokio::test] - async fn new_protocol_uses_read_dir_batch_request() { - let (proxy, mut tasks) = setup_proxy(Version::new(1, 8, 3)).await; - - prepare_dir(&proxy, &mut tasks).await; - - let request = FileRequest::ReadDirBatch(ReadDirBatchRequest { - remote_fd: 0xdad, - amount: 0xca7, - }); - proxy - .send(SimpleProxyMessage::FileReq(0xbad, LayerId(0xa55), request)) - .await; - let (_, update) = tasks.next().await.unzip(); - - assert!( - matches!( - update, - Some(TaskUpdate::Message(ProxyMessage::ToAgent( - ClientMessage::FileRequest(FileRequest::ReadDirBatch(ReadDirBatchRequest { - remote_fd: 0xdad, - amount: 0xca7 - })) - ))) - ), - "Mismatched message for `ReadDirBatchRequest` {update:?}!" - ); - - let response = FileResponse::ReadDirBatch(Ok(ReadDirBatchResponse { - fd: 0xdad, - dir_entries: Vec::new(), - })); - proxy.send(SimpleProxyMessage::FileRes(response)).await; - let (_, update) = tasks.next().await.unzip(); - - assert!( - matches!( - update, - Some(TaskUpdate::Message(ProxyMessage::ToLayer(ToLayer { - message_id: 0xbad, - layer_id: LayerId(0xa55), - message: ProxyToLayerMessage::File(FileResponse::ReadDir(Ok( - ReadDirResponse { .. } - ))) - }))) - ), - "Mismatched message for `ReadDirBatchResponse` {update:?}!" - ); - - drop(proxy); - let results = tasks.results().await; - for (_, result) in results { - assert!(result.is_ok(), "{result:?}"); - } + Ok(()) } } diff --git a/mirrord/intproxy/src/remote_resources.rs b/mirrord/intproxy/src/remote_resources.rs index a195a75c1b8..055455b7818 100644 --- a/mirrord/intproxy/src/remote_resources.rs +++ b/mirrord/intproxy/src/remote_resources.rs @@ -1,21 +1,19 @@ use std::{ - collections::{hash_map::Entry, HashMap}, + collections::{hash_map::Entry, HashMap, HashSet}, hash::Hash, }; use mirrord_intproxy_protocol::LayerId; use tracing::Level; -use crate::proxies::simple::FileResource; - /// For tracking remote resources allocated in the agent: open files and directories, port /// subscriptions. Remote resources can be shared by multiple layer instances because of forks. -pub struct RemoteResources { - by_layer: HashMap>, +pub struct RemoteResources { + by_layer: HashMap>, counts: HashMap, } -impl Default for RemoteResources { +impl Default for RemoteResources { fn default() -> Self { Self { by_layer: Default::default(), @@ -24,10 +22,9 @@ impl Default for RemoteResources { } } -impl RemoteResources +impl RemoteResources where T: Clone + PartialEq + Eq + Hash + core::fmt::Debug, - Resource: Clone, { /// Removes the given resource from the layer instance with the given [`LayerId`]. /// Returns whether the resource should be closed on the agent side. @@ -47,7 +44,7 @@ where Entry::Vacant(..) => return false, }; - if removed.is_none() { + if !removed { return false; } @@ -78,8 +75,8 @@ where return; }; - for resource in resources.keys().cloned() { - *self.counts.entry(resource).or_default() += 1; + for resource in &resources { + *self.counts.entry(resource.clone()).or_default() += 1; } self.by_layer.insert(dst, resources); @@ -94,7 +91,7 @@ where let resources = self.by_layer.remove(&layer_id).unwrap_or_default(); resources - .into_keys() + .into_iter() .filter(|resource| match self.counts.entry(resource.clone()) { Entry::Occupied(e) if *e.get() == 1 => { e.remove(); @@ -109,12 +106,7 @@ where } }) } -} -impl RemoteResources -where - T: Clone + PartialEq + Eq + Hash, -{ /// Adds the given resource to the layer instance with the given [`LayerId`]. /// /// Used when the layer opens a resource, e.g. with @@ -125,45 +117,10 @@ where .by_layer .entry(layer_id) .or_default() - .try_insert(resource.clone(), ()) - .is_ok(); - - if added { - *self.counts.entry(resource).or_default() += 1; - } - } -} - -impl RemoteResources -where - T: Clone + PartialEq + Eq + Hash, -{ - /// Adds the given resource to the layer instance with the given [`LayerId`]. - /// - /// Used when the layer opens a resource, e.g. with - /// [`OpenFileRequest`](mirrord_protocol::file::OpenFileRequest). - #[tracing::instrument(level = Level::TRACE, skip(self, resource, file))] - pub(crate) fn add(&mut self, layer_id: LayerId, resource: T, file: FileResource) { - let added = self - .by_layer - .entry(layer_id) - .or_default() - .try_insert(resource.clone(), file) - .is_ok(); + .insert(resource.clone()); if added { *self.counts.entry(resource).or_default() += 1; } } - - #[tracing::instrument(level = Level::TRACE, skip(self, resource_key))] - pub(crate) fn get_mut( - &mut self, - layer_id: &LayerId, - resource_key: &T, - ) -> Option<&mut FileResource> { - self.by_layer - .get_mut(layer_id) - .and_then(|files| files.get_mut(resource_key)) - } } diff --git a/mirrord/intproxy/src/request_queue.rs b/mirrord/intproxy/src/request_queue.rs index 18a99525e61..e9651884a15 100644 --- a/mirrord/intproxy/src/request_queue.rs +++ b/mirrord/intproxy/src/request_queue.rs @@ -14,44 +14,59 @@ use std::{collections::VecDeque, fmt}; use mirrord_intproxy_protocol::{LayerId, MessageId}; -use thiserror::Error; use tracing::Level; -/// Erorr returned when the proxy attempts to retrieve [`MessageId`] and [`LayerId`] of a request -/// corresponding to a response received from the agent, but the [`RequestQueue`] is empty. This -/// error should never happen. -#[derive(Error, Debug)] -#[error("request queue is empty")] -pub struct RequestQueueEmpty; - /// A queue used to match agent responses with layer requests. /// A single queue can be used for multiple types of requests only if the agent preserves order /// between them. -#[derive(Default)] -pub struct RequestQueue { - inner: VecDeque<(MessageId, LayerId)>, +pub struct RequestQueue { + inner: VecDeque<(MessageId, LayerId, T)>, +} + +impl Default for RequestQueue { + fn default() -> Self { + Self { + inner: Default::default(), + } + } } -impl fmt::Debug for RequestQueue { +impl fmt::Debug for RequestQueue { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("RequestQueue") .field("queue_len", &self.inner.len()) - .field("front", &self.inner.front().copied()) - .field("back", &self.inner.back().copied()) + .field("front", &self.inner.front()) + .field("back", &self.inner.back()) .finish() } } -impl RequestQueue { +impl RequestQueue { + /// Save the request at the end of this queue. + #[tracing::instrument(level = Level::TRACE)] + pub fn push_back(&mut self, message_id: MessageId, layer_id: LayerId) { + self.inner + .push_back((message_id, layer_id, Default::default())); + } +} + +impl RequestQueue { + /// Retrieve and remove a request from the front of this queue. + #[tracing::instrument(level = Level::TRACE)] + pub fn pop_front(&mut self) -> Option<(MessageId, LayerId)> { + let (message_id, layer_id, _) = self.inner.pop_front()?; + Some((message_id, layer_id)) + } + /// Save the request at the end of this queue. #[tracing::instrument(level = Level::TRACE)] - pub fn insert(&mut self, message_id: MessageId, layer_id: LayerId) { - self.inner.push_back((message_id, layer_id)); + pub fn push_back_with_data(&mut self, message_id: MessageId, layer_id: LayerId, data: T) { + self.inner.push_back((message_id, layer_id, data)); } /// Retrieve and remove a request from the front of this queue. #[tracing::instrument(level = Level::TRACE)] - pub fn get(&mut self) -> Result<(MessageId, LayerId), RequestQueueEmpty> { - self.inner.pop_front().ok_or(RequestQueueEmpty) + pub fn pop_front_with_data(&mut self) -> Option<(MessageId, LayerId, T)> { + self.inner.pop_front() } } diff --git a/mirrord/layer/Cargo.toml b/mirrord/layer/Cargo.toml index b14e577d4f1..d46147b8482 100644 --- a/mirrord/layer/Cargo.toml +++ b/mirrord/layer/Cargo.toml @@ -19,19 +19,22 @@ workspace = true # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -mirrord-config = { path = "../config"} -mirrord-protocol = { path = "../protocol"} -mirrord-layer-macro = { path = "./macro"} +mirrord-config = { path = "../config" } +mirrord-protocol = { path = "../protocol" } +mirrord-layer-macro = { path = "./macro" } mirrord-console = { path = "../console" } -mirrord-intproxy-protocol = { path = "../intproxy/protocol", features = ["codec"] } +mirrord-intproxy-protocol = { path = "../intproxy/protocol", features = [ + "codec", +] } ctor = "0.2" libc.workspace = true bincode.workspace = true -nix = { workspace = true, features = ["net", "process", "signal"]} +nix = { workspace = true, features = ["net", "process", "signal"] } tracing.workspace = true tracing-subscriber.workspace = true frida-gum = { version = "0.15", features = ["auto-download"] } +hyper = { workspace = true } serde_json.workspace = true @@ -60,7 +63,6 @@ mirrord-intproxy = { path = "../intproxy" } k8s-openapi.workspace = true chrono = { workspace = true, features = ["clock"] } http-body = { workspace = true } -hyper = { workspace = true } rstest.workspace = true tempfile.workspace = true test-cdylib = "*" diff --git a/mirrord/layer/src/debugger_ports.rs b/mirrord/layer/src/debugger_ports.rs index 2b2feb0aed6..f78520fa643 100644 --- a/mirrord/layer/src/debugger_ports.rs +++ b/mirrord/layer/src/debugger_ports.rs @@ -1,10 +1,11 @@ use std::{ - env, + env, fmt, net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr}, ops::RangeInclusive, str::FromStr, }; +use hyper::Uri; use tracing::{error, warn}; /// Environment variable used to tell the layer that it should dynamically detect the local @@ -20,10 +21,14 @@ pub const MIRRORD_DETECT_DEBUGGER_PORT_ENV: &str = "MIRRORD_DETECT_DEBUGGER_PORT /// be used by the debugger. Used when injecting the layer through IDE. This setting will be ignored /// if the layer successfully detects the port at runtime, see [`MIRRORD_DETECT_DEBUGGER_PORT_ENV`]. /// -/// Value passed through this variable can represent a single port like '12233' or a range of ports -/// like `12233-13000`. +/// Value passed through this variable can represent a single port like '12233', a range of ports +/// like '12233-13000' or multiple individual ports like '12233,13344,14455' pub const MIRRORD_IGNORE_DEBUGGER_PORTS_ENV: &str = "MIRRORD_IGNORE_DEBUGGER_PORTS"; +/// The default port used by node's --inspect debugger from the +/// [node doceumentation](https://nodejs.org/en/learn/getting-started/debugging#enable-inspector) +pub const NODE_INSPECTOR_DEFAULT_PORT: u16 = 9229; + /// Type of debugger which is used to run the user's processes. /// Determines the way we parse the command line arguments the debugger's port. /// Logic of processing the arguments is based on examples taken from the IDEs. @@ -80,6 +85,21 @@ pub enum DebuggerType { /// @/var/folders/2h/fn_s1t8n0cqfc9x71yq845m40000gn/T/cp_dikq30ybalqwcehe333w2xxhd.argfile /// com.example.demo.DemoApplication JavaAgent, + /// Used in node applications, the flags `--inspect`, `--inspect-brk` and `--inspect-wait` + /// invoke the inspector. Invoking them as command line arguments is deprecated, but they are + /// set into the NODE_OPTIONS env var as, for example, `--inspect=9230` + /// + /// the NODE_OPTIONS env var looks like this: + /// "NODE_OPTIONS": "--require=/Path/to/thing --inspect-publish-uid=http + /// --max-old-space-size=9216 --enable-source-maps --inspect=9994" + /// + /// Alternatively, the process may be string on a different port with the NODE_INSPECTOR_INFO + /// env var set, with the port in the "inspectorURL" address + /// + /// the NODE_INSPECTOR_INFO env var looks like this: + /// "NODE_INSPECTOR_INFO" : {"ipcAddress":"/Path/to/thing","pid":"75321",... + /// "inspectorURL":"ws://127.0.0.1:9229/8decd19b-8ea8-45f4-bf72-095ddbdad103"} + NodeInspector, } impl FromStr for DebuggerType { @@ -91,81 +111,162 @@ impl FromStr for DebuggerType { "pydevd" => Ok(Self::PyDevD), "resharper" => Ok(Self::ReSharper), "javaagent" => Ok(Self::JavaAgent), + "nodeinspector" => Ok(Self::NodeInspector), _ => Err(format!("invalid debugger type: {s}")), } } } +impl fmt::Display for DebuggerType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}", + match self { + DebuggerType::DebugPy => "debugpy", + DebuggerType::PyDevD => "pydevd", + DebuggerType::ReSharper => "resharper", + DebuggerType::JavaAgent => "javaagent", + DebuggerType::NodeInspector => "nodeinspector", + } + ) + } +} + impl DebuggerType { /// Retrieves the port used by debugger of this type from the command. - fn get_port(self, args: &[String]) -> Option { - match self { + /// May return multiple ports when using the inspect flags with node. + /// Also returns the value to set into MIRRORD_DETECT_DEBUGGER_PORT_ENV, if any + fn get_ports Option>( + self, + args: &[String], + mut get_env: F, + ) -> (Vec, Option) { + let ports = match self { Self::DebugPy => { - let is_python = args.first()?.rsplit('/').next()?.starts_with("py"); - let runs_debugpy = if args.get(1)?.starts_with("-X") { - args.get(3)?.ends_with("debugpy") // newer args layout + let is_python = args + .first() + .map(String::as_str) + .unwrap_or_default() + .rsplit('/') + .next() + .unwrap_or_default() + .starts_with("py"); + let runs_debugpy = if args.get(1).map(String::as_str).unwrap_or_default().starts_with("-X") { + args.get(3).map(String::as_str).unwrap_or_default().ends_with("debugpy") // newer args layout } else { - args.get(1)?.ends_with("debugpy") // older args layout + args.get(1).map(String::as_str).unwrap_or_default().ends_with("debugpy") // older args layout }; - if !is_python || !runs_debugpy { - None? + if is_python && runs_debugpy { + args.windows(2).find_map(|window| match window { + [opt, val] if opt == "--connect" => val.parse::().ok(), + _ => None, + }) + } else { + None } - - args.windows(2).find_map(|window| match window { - [opt, val] if opt == "--connect" => val.parse::().ok(), - _ => None, - }) + .into_iter() + .collect::>() } Self::PyDevD => { - let is_python = args.first()?.rsplit('/').next()?.starts_with("py"); - let runs_pydevd = args.get(1)?.rsplit('/').next()?.contains("pydevd"); - - if !is_python || !runs_pydevd { - None? + let is_python = args + .first() + .map(String::as_str) + .unwrap_or_default() + .rsplit('/') + .next() + .unwrap_or_default() + .starts_with("py"); + let runs_pydevd = args + .get(1) + .map(String::as_str) + .unwrap_or_default() + .rsplit('/') + .next() + .unwrap_or_default() + .contains("pydevd"); + + if is_python && runs_pydevd { + let client = args.windows(2).find_map(|window| match window { + [opt, val] if opt == "--client" => val.parse::().ok(), + _ => None, + }); + let port = args.windows(2).find_map(|window| match window { + [opt, val] if opt == "--port" => val.parse::().ok(), + _ => None, + }); + + if let (Some(client), Some(port)) = (client, port) { + SocketAddr::new(client, port).into() + } else { + None + } + } else { + None } - - let client = args.windows(2).find_map(|window| match window { - [opt, val] if opt == "--client" => val.parse::().ok(), - _ => None, - })?; - let port = args.windows(2).find_map(|window| match window { - [opt, val] if opt == "--port" => val.parse::().ok(), - _ => None, - })?; - - SocketAddr::new(client, port).into() + .into_iter() + .collect::>() } Self::ReSharper => { - let is_dotnet = args.first()?.ends_with("dotnet"); - let runs_debugger = args.get(2)?.contains("Debugger"); - - if !is_dotnet || !runs_debugger { - None? + let is_dotnet = args.first().map(String::as_str).unwrap_or_default().ends_with("dotnet"); + let runs_debugger = args.get(2).map(String::as_str).unwrap_or_default().contains("Debugger"); + + if is_dotnet && runs_debugger { + args.iter() + .find_map(|arg| arg.strip_prefix("--frontend-port=")) + .and_then(|port| port.parse::().ok()) + .map(|port| SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), port)) + } else { + None } - - args.iter() - .find_map(|arg| arg.strip_prefix("--frontend-port=")) - .and_then(|port| port.parse::().ok()) - .map(|port| SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), port)) + .into_iter() + .collect::>() } Self::JavaAgent => { - let is_java = args.first()?.ends_with("java"); - - if !is_java { - None? + let is_java = args.first().map(String::as_str).unwrap_or_default().ends_with("java"); + + if is_java { + args.iter() + .find_map(|arg| arg.strip_prefix("-agentlib:jdwp=transport=dt_socket")) + .and_then(|agent_lib_args| { + agent_lib_args + .split(',') + .find_map(|arg| arg.strip_prefix("address=")) + }) + .and_then(|full_address| full_address.split(':').last()) + .and_then(|port| port.parse::().ok()) + .map(|port| SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), port)) + } else { + None } - - args.iter() - .find_map(|arg| arg.strip_prefix("-agentlib:jdwp=transport=dt_socket")) - .and_then(|agent_lib_args| agent_lib_args.split(',').find_map(|arg| arg.strip_prefix("address="))) - .and_then(|full_address| full_address.split(':').last()) - .and_then(|port| port.parse::().ok()) - .map(|port| SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), port)) - + .into_iter() + .collect::>() } - } - .and_then(|addr| match addr.ip() { + Self::NodeInspector => { + if let Some(value) = get_env("NODE_OPTIONS") { + // matching specific flags so we avoid matching on, for example, + // `--inspect-publish-uid=http` + value.split_ascii_whitespace() + .filter_map(|flag| match flag.split_once('=') { + Some(("--inspect" | "--inspect-brk" | "--inspect-wait", port)) => port.parse::().ok(), + None if ["--inspect", "--inspect-brk", "--inspect-wait"].contains(&flag) => Some(NODE_INSPECTOR_DEFAULT_PORT), + _ => None, + }) + .map(|port| SocketAddr::new(Ipv4Addr::LOCALHOST.into(), port)) + .collect::>() + } else if let Some(value) = get_env("NODE_INSPECTOR_INFO") { + value.split(',').filter_map(|var| match var.split_once(':')? { + ("inspectorURL", url) => url.parse::().ok()?.port_u16(), + _ => None, + }) + .map(|port| SocketAddr::new(Ipv4Addr::LOCALHOST.into(), port)) + .collect::>() + } else { + vec![] + } + } + }.iter().filter_map(|addr| match addr.ip() { IpAddr::V4(Ipv4Addr::LOCALHOST) | IpAddr::V6(Ipv6Addr::LOCALHOST) => Some(addr.port()), other => { warn!( @@ -175,18 +276,85 @@ impl DebuggerType { None } }) + .collect::>(); + + let next_type = match self { + // node may require several rounds of port ignoring when used in the VSCode plugin + DebuggerType::NodeInspector => Some(DebuggerType::NodeInspector), + _ => None, + }; + (ports, next_type) } } /// Local ports used by the debugger running the process. /// These should be ignored by the layer. -#[derive(Debug)] +#[derive(Clone, Debug)] pub enum DebuggerPorts { - Detected(u16), + Single(u16), FixedRange(RangeInclusive), + Combination(Vec), None, } +impl FromStr for DebuggerPorts { + type Err = std::convert::Infallible; + + fn from_str(s: &str) -> Result { + // string looks like 'port1,port2,port3-portx,porty-portz' + let mut vec = vec![]; + s.split(',') + .for_each(|s| { + let chunks = s + .split('-') + .map(u16::from_str) + .collect::, _>>() + .inspect_err(|e| error!( + "Failed to decode debugger port range from {} env variable: {}", + MIRRORD_IGNORE_DEBUGGER_PORTS_ENV, + e + )) + .ok().unwrap_or_default(); + match *chunks.as_slice() { + [p] => vec.push(Self::Single(p)), + [p1, p2] if p1 <= p2 => vec.push(Self::FixedRange(p1..=p2)), + _ => { + error!( + "Failed to decode debugger ports from {} env variable: expected a port or range of ports", + MIRRORD_IGNORE_DEBUGGER_PORTS_ENV, + ); + }, + }; + }); + if !vec.is_empty() { + Ok(Self::Combination(vec)) + } else { + Ok(Self::None) + } + } +} + +impl fmt::Display for DebuggerPorts { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}", + match self { + DebuggerPorts::Single(port) => port.to_string(), + DebuggerPorts::FixedRange(range_inclusive) => + format!("{}-{}", range_inclusive.start(), range_inclusive.end()), + DebuggerPorts::Combination(vec) => { + vec.iter() + .map(ToString::to_string) + .collect::>() + .join(",") + } + DebuggerPorts::None => String::default(), + } + ) + } +} + impl DebuggerPorts { /// Create a new instance of this enum based on the environment variables /// ([`MIRRORD_DETECT_DEBUGGER_PORT_ENV`] and [`MIRRORD_IGNORE_DEBUGGER_PORTS_ENV`]) and command @@ -194,7 +362,7 @@ impl DebuggerPorts { /// /// Log errors (like malformed env variables) but do not panic. pub fn from_env() -> Self { - let detected = env::var(MIRRORD_DETECT_DEBUGGER_PORT_ENV) + let (detected, next) = match env::var(MIRRORD_DETECT_DEBUGGER_PORT_ENV) .ok() .and_then(|s| { DebuggerType::from_str(&s) @@ -205,44 +373,38 @@ impl DebuggerPorts { ) }) .ok() - }) - .and_then(|d| d.get_port(&std::env::args().collect::>())); - if let Some(port) = detected { - env::set_var(MIRRORD_IGNORE_DEBUGGER_PORTS_ENV, port.to_string()); - env::remove_var(MIRRORD_DETECT_DEBUGGER_PORT_ENV); - return Self::Detected(port); + }) { + Some(debugger) => debugger.get_ports(&std::env::args().collect::>(), |name| { + std::env::var(name).ok() + }), + None => (vec![], None), + }; + if !detected.is_empty() { + let mut dbg_ports = detected + .iter() + .map(|&port| Some(Self::Single(port))) + .collect::>(); + if let Ok(existing) = env::var(MIRRORD_IGNORE_DEBUGGER_PORTS_ENV) { + dbg_ports.push(DebuggerPorts::from_str(&existing).ok()); + } + let dbg_ports = dbg_ports.into_iter().flatten().collect::>(); + let dbg_port = Self::Combination(dbg_ports); + env::set_var(MIRRORD_IGNORE_DEBUGGER_PORTS_ENV, dbg_port.to_string()); + + if let Some(next_type) = next { + env::set_var(MIRRORD_DETECT_DEBUGGER_PORT_ENV, next_type.to_string()); + } else { + env::remove_var(MIRRORD_DETECT_DEBUGGER_PORT_ENV); + } + return dbg_port; } - let fixed_range = env::var(MIRRORD_IGNORE_DEBUGGER_PORTS_ENV) + // IGNORE_DEBUGGER_PORTS may have a combination of single, multiple or ranges of ports + // separated by a comma they need to be parsed individually + env::var(MIRRORD_IGNORE_DEBUGGER_PORTS_ENV) .ok() - .and_then(|s| { - let chunks = s - .split('-') - .map(u16::from_str) - .collect::, _>>() - .inspect_err(|e| error!( - "Failed to decode debugger ports from {} env variable: {}", - MIRRORD_IGNORE_DEBUGGER_PORTS_ENV, - e - )) - .ok()?; - match *chunks.as_slice() { - [p] => Some(p..=p), - [p1, p2] if p1 <= p2 => Some(p1..=p2), - _ => { - error!( - "Failed to decode debugger ports from {} env variable: expected a port or a range of ports", - MIRRORD_IGNORE_DEBUGGER_PORTS_ENV, - ); - None - }, - } - }); - if let Some(range) = fixed_range { - return Self::FixedRange(range); - } - - Self::None + .and_then(|s| Self::from_str(&s).ok()) + .unwrap_or(Self::None) } /// Return whether the given [SocketAddr] is used by the debugger. @@ -256,8 +418,12 @@ impl DebuggerPorts { } match self { - Self::Detected(port) => *port == addr.port(), + Self::Single(port) => port == &addr.port(), Self::FixedRange(range) => range.contains(&addr.port()), + Self::Combination(vec) => vec + .iter() + .map(|ports| ports.contains(addr)) + .fold(false, |acc, ports| ports || acc), Self::None => false, } } @@ -275,13 +441,16 @@ mod test { let debugger = DebuggerType::DebugPy; assert_eq!( - debugger.get_port( - &command - .split_ascii_whitespace() - .map(ToString::to_string) - .collect::>() - ), - Some(57141), + debugger + .get_ports( + &command + .split_ascii_whitespace() + .map(ToString::to_string) + .collect::>(), + |_| None + ) + .0, + vec![57141], ) } @@ -291,13 +460,16 @@ mod test { let command = "/path/to/python /path/to/pycharm/plugins/pydevd.py --multiprocess --qt-support=auto --client 127.0.0.1 --port 32845 --file /path/to/script.py"; assert_eq!( - debugger.get_port( - &command - .split_ascii_whitespace() - .map(ToString::to_string) - .collect::>() - ), - Some(32845), + debugger + .get_ports( + &command + .split_ascii_whitespace() + .map(ToString::to_string) + .collect::>(), + |_| None + ) + .0, + vec![32845], ) } @@ -307,13 +479,16 @@ mod test { let command = "/path/to/rider/lib/ReSharperHost/linux-x64/dotnet/dotnet exec /path/to/rider/lib/ReSharperHost/JetBrains.Debugger.Worker.exe --mode=client --frontend-port=40905 --plugins=/path/to/rider/plugins/rider-unity/dotnetDebuggerWorker;/path/to/rider/plugins/dpa/DotFiles/JetBrains.DPA.DebugInjector.dll --etw-collect-flags=2 --backend-pid=222222 --handle=333"; assert_eq!( - debugger.get_port( - &command - .split_ascii_whitespace() - .map(ToString::to_string) - .collect::>() - ), - Some(40905) + debugger + .get_ports( + &command + .split_ascii_whitespace() + .map(ToString::to_string) + .collect::>(), + |_| None + ) + .0, + vec![40905] ) } @@ -328,21 +503,52 @@ mod test { let debugger = DebuggerType::JavaAgent; assert_eq!( - debugger.get_port( - &command_line - .split_ascii_whitespace() - .map(ToString::to_string) - .collect::>() - ), - Some(54898) + debugger + .get_ports( + &command_line + .split_ascii_whitespace() + .map(ToString::to_string) + .collect::>(), + |_| None + ) + .0, + vec![54898] + ) + } + + #[rstest] + #[case(("NODE_OPTIONS", Some("--require=/path --inspect-publish-uid=http --inspect=9994")), vec![9994])] + #[case(("NODE_OPTIONS", Some("--require=/path --inspect-publish-uid=http --inspect")), vec![9229])] + #[case(("NODE_OPTIONS", Some("--require=/path --inspect-publish-uid=http --inspect=9994 --inspect-brk=9001")), vec![9994, 9001])] + fn detect_nodeinspector_port(#[case] env: (&str, Option<&str>), #[case] ports: Vec) { + let debugger = DebuggerType::NodeInspector; + let command = "/Path/to/node /Path/to/node/v20.17.0/bin/npx next dev"; + + assert_eq!( + debugger + .get_ports( + &command + .split_ascii_whitespace() + .map(ToString::to_string) + .collect::>(), + |name| { + if name == env.0 { + env.1.map(ToString::to_string) + } else { + None + } + } + ) + .0, + ports ) } #[test] fn debugger_ports_contain() { - assert!(DebuggerPorts::Detected(1337).contains(&"127.0.0.1:1337".parse().unwrap())); - assert!(!DebuggerPorts::Detected(1337).contains(&"127.0.0.1:1338".parse().unwrap())); - assert!(!DebuggerPorts::Detected(1337).contains(&"8.8.8.8:1337".parse().unwrap())); + assert!(DebuggerPorts::Single(1337).contains(&"127.0.0.1:1337".parse().unwrap())); + assert!(!DebuggerPorts::Single(1337).contains(&"127.0.0.1:1338".parse().unwrap())); + assert!(!DebuggerPorts::Single(1337).contains(&"8.8.8.8:1337".parse().unwrap())); assert!( DebuggerPorts::FixedRange(45000..=50000).contains(&"127.0.0.1:47888".parse().unwrap()) @@ -354,6 +560,12 @@ mod test { !DebuggerPorts::FixedRange(45000..=50000).contains(&"8.8.8.8:47888".parse().unwrap()) ); + let ports = vec![DebuggerPorts::Single(1337), DebuggerPorts::Single(1338)]; + assert!( + DebuggerPorts::Combination(ports.clone()).contains(&"127.0.0.1:1337".parse().unwrap()) + ); + assert!(!DebuggerPorts::Combination(ports).contains(&"127.0.0.1:1340".parse().unwrap())); + assert!(!DebuggerPorts::None.contains(&"127.0.0.1:1337".parse().unwrap())); } } diff --git a/mirrord/layer/src/file/hooks.rs b/mirrord/layer/src/file/hooks.rs index 30217589d08..4de1e73577f 100644 --- a/mirrord/layer/src/file/hooks.rs +++ b/mirrord/layer/src/file/hooks.rs @@ -1062,6 +1062,32 @@ pub(crate) unsafe extern "C" fn readlink_detour( }) } +/// Hook for `libc::mkdir`. +#[hook_guard_fn] +pub(crate) unsafe extern "C" fn mkdir_detour(pathname: *const c_char, mode: u32) -> c_int { + mkdir(pathname.checked_into(), mode) + .map(|()| 0) + .unwrap_or_bypass_with(|bypass| { + let raw_path = update_ptr_from_bypass(pathname, &bypass); + FN_MKDIR(raw_path, mode) + }) +} + +/// Hook for `libc::mkdirat`. +#[hook_guard_fn] +pub(crate) unsafe extern "C" fn mkdirat_detour( + dirfd: c_int, + pathname: *const c_char, + mode: u32, +) -> c_int { + mkdirat(dirfd, pathname.checked_into(), mode) + .map(|()| 0) + .unwrap_or_bypass_with(|bypass| { + let raw_path = update_ptr_from_bypass(pathname, &bypass); + FN_MKDIRAT(dirfd, raw_path, mode) + }) +} + /// Convenience function to setup file hooks (`x_detour`) with `frida_gum`. pub(crate) unsafe fn enable_file_hooks(hook_manager: &mut HookManager) { replace!(hook_manager, "open", open_detour, FnOpen, FN_OPEN); @@ -1136,6 +1162,16 @@ pub(crate) unsafe fn enable_file_hooks(hook_manager: &mut HookManager) { FN_READLINK ); + replace!(hook_manager, "mkdir", mkdir_detour, FnMkdir, FN_MKDIR); + + replace!( + hook_manager, + "mkdirat", + mkdirat_detour, + FnMkdirat, + FN_MKDIRAT + ); + replace!(hook_manager, "lseek", lseek_detour, FnLseek, FN_LSEEK); replace!(hook_manager, "write", write_detour, FnWrite, FN_WRITE); diff --git a/mirrord/layer/src/file/ops.rs b/mirrord/layer/src/file/ops.rs index ae914e2756c..580f8eacc8a 100644 --- a/mirrord/layer/src/file/ops.rs +++ b/mirrord/layer/src/file/ops.rs @@ -7,9 +7,9 @@ use libc::{c_char, statx, statx_timestamp}; use libc::{c_int, iovec, unlink, AT_FDCWD}; use mirrord_protocol::{ file::{ - OpenFileRequest, OpenFileResponse, OpenOptionsInternal, ReadFileResponse, - ReadLinkFileRequest, ReadLinkFileResponse, SeekFileResponse, WriteFileResponse, - XstatFsResponse, XstatResponse, + MakeDirAtRequest, MakeDirRequest, OpenFileRequest, OpenFileResponse, OpenOptionsInternal, + ReadFileResponse, ReadLinkFileRequest, ReadLinkFileResponse, SeekFileResponse, + WriteFileResponse, XstatFsResponse, XstatResponse, }, ResponseError, }; @@ -337,6 +337,56 @@ pub(crate) fn read_link(path: Detour) -> Detour { } } +#[mirrord_layer_macro::instrument(level = Level::TRACE, ret)] +pub(crate) fn mkdir(pathname: Detour, mode: u32) -> Detour<()> { + let pathname = pathname?; + + check_relative_paths!(pathname); + + let path = remap_path!(pathname); + + ensure_not_ignored!(path, false); + + let mkdir = MakeDirRequest { + pathname: path, + mode, + }; + + // `NotImplemented` error here means that the protocol doesn't support it. + match common::make_proxy_request_with_response(mkdir)? { + Ok(response) => Detour::Success(response), + Err(ResponseError::NotImplemented) => Detour::Bypass(Bypass::NotImplemented), + Err(fail) => Detour::Error(fail.into()), + } +} + +#[mirrord_layer_macro::instrument(level = Level::TRACE, ret)] +pub(crate) fn mkdirat(dirfd: RawFd, pathname: Detour, mode: u32) -> Detour<()> { + let pathname: PathBuf = pathname?; + + if pathname.is_absolute() || dirfd == AT_FDCWD { + let path = remap_path!(pathname); + mkdir(Detour::Success(path), mode) + } else { + // Relative path requires special handling, we must identify the relative part (relative to + // what). + let remote_fd = get_remote_fd(dirfd)?; + + let mkdir: MakeDirAtRequest = MakeDirAtRequest { + dirfd: remote_fd, + pathname: pathname.clone(), + mode, + }; + + // `NotImplemented` error here means that the protocol doesn't support it. + match common::make_proxy_request_with_response(mkdir)? { + Ok(response) => Detour::Success(response), + Err(ResponseError::NotImplemented) => Detour::Bypass(Bypass::NotImplemented), + Err(fail) => Detour::Error(fail.into()), + } + } +} + pub(crate) fn pwrite(local_fd: RawFd, buffer: &[u8], offset: u64) -> Detour { let remote_fd = get_remote_fd(local_fd)?; trace!("pwrite: local_fd {local_fd}"); diff --git a/mirrord/layer/src/go/linux_x64.rs b/mirrord/layer/src/go/linux_x64.rs index cd44a927702..dcabdeee4b7 100644 --- a/mirrord/layer/src/go/linux_x64.rs +++ b/mirrord/layer/src/go/linux_x64.rs @@ -341,6 +341,9 @@ unsafe extern "C" fn c_abi_syscall_handler( } libc::SYS_fstat => fstat_detour(param1 as _, param2 as _) as i64, libc::SYS_getdents64 => getdents64_detour(param1 as _, param2 as _, param3 as _) as i64, + #[cfg(all(target_os = "linux", not(target_arch = "aarch64")))] + libc::SYS_mkdir => mkdir_detour(param1 as _, param2 as _) as i64, + libc::SYS_mkdirat => mkdirat_detour(param1 as _, param2 as _, param3 as _) as i64, _ => { let (Ok(result) | Err(result)) = syscalls::syscall!( syscalls::Sysno::from(syscall as i32), diff --git a/mirrord/layer/src/go/mod.rs b/mirrord/layer/src/go/mod.rs index 63d468c3f6b..003eed8692c 100644 --- a/mirrord/layer/src/go/mod.rs +++ b/mirrord/layer/src/go/mod.rs @@ -110,6 +110,9 @@ unsafe extern "C" fn c_abi_syscall6_handler( libc::SYS_getdents64 => { getdents64_detour(param1 as _, param2 as _, param3 as _) as i64 } + #[cfg(all(target_os = "linux", not(target_arch = "aarch64")))] + libc::SYS_mkdir => mkdir_detour(param1 as _, param2 as _) as i64, + libc::SYS_mkdirat => mkdirat_detour(param1 as _, param2 as _, param3 as _) as i64, _ => { let (Ok(result) | Err(result)) = syscalls::syscall!( syscalls::Sysno::from(syscall as i32), diff --git a/mirrord/layer/tests/apps/mkdir/mkdir.c b/mirrord/layer/tests/apps/mkdir/mkdir.c new file mode 100644 index 00000000000..4253b6c089d --- /dev/null +++ b/mirrord/layer/tests/apps/mkdir/mkdir.c @@ -0,0 +1,22 @@ +#include +#include +#include +#include +#include + +/// Test `mkdir`. +/// +/// Creates a folder in the current directory. +/// +int main() +{ + char *mkdir_test_path = "/mkdir_test_path"; + int mkdir_result = mkdir(mkdir_test_path, 0777); + assert(mkdir_result == 0); + + char *mkdirat_test_path = "/mkdirat_test_path"; + int mkdirat_result = mkdirat(AT_FDCWD, mkdirat_test_path, 0777); + assert(mkdirat_result == 0); + + return 0; +} diff --git a/mirrord/layer/tests/common/mod.rs b/mirrord/layer/tests/common/mod.rs index 0469a6695bb..819dce2fe14 100644 --- a/mirrord/layer/tests/common/mod.rs +++ b/mirrord/layer/tests/common/mod.rs @@ -109,7 +109,7 @@ impl TestIntProxy { let agent_conn = AgentConnection::new_for_raw_address(fake_agent_address) .await .unwrap(); - let intproxy = IntProxy::new_with_connection(agent_conn, listener); + let intproxy = IntProxy::new_with_connection(agent_conn, listener, 0); intproxy .run(Duration::from_secs(5), Duration::from_secs(5)) .await @@ -468,6 +468,25 @@ impl TestIntProxy { .unwrap(); } + /// Makes a [`FileRequest::MakeDir`] and answers it. + pub async fn expect_make_dir(&mut self, expected_dir_name: &str, expected_mode: u32) { + // Expecting `mkdir` call with path. + assert_matches!( + self.recv().await, + ClientMessage::FileRequest(FileRequest::MakeDir( + mirrord_protocol::file::MakeDirRequest { pathname, mode } + )) if pathname.to_str().unwrap() == expected_dir_name && mode == expected_mode + ); + + // Answer `mkdir`. + self.codec + .send(DaemonMessage::File( + mirrord_protocol::FileResponse::MakeDir(Ok(())), + )) + .await + .unwrap(); + } + /// Verify that the passed message (not the next message from self.codec!) is a file read. /// Return buffer size. pub async fn expect_message_file_read(message: ClientMessage, expected_fd: u64) -> u64 { @@ -743,6 +762,7 @@ pub enum Application { RustListenPorts, Fork, ReadLink, + MakeDir, OpenFile, CIssue2055, CIssue2178, @@ -796,6 +816,7 @@ impl Application { Application::PythonFastApiHTTP | Application::PythonIssue864 => String::from("uvicorn"), Application::Fork => String::from("tests/apps/fork/out.c_test_app"), Application::ReadLink => String::from("tests/apps/readlink/out.c_test_app"), + Application::MakeDir => String::from("tests/apps/mkdir/out.c_test_app"), Application::Realpath => String::from("tests/apps/realpath/out.c_test_app"), Application::NodeHTTP | Application::NodeIssue2283 | Application::NodeIssue2807 => { String::from("node") @@ -1032,6 +1053,7 @@ impl Application { | Application::Go23FAccessAt | Application::Fork | Application::ReadLink + | Application::MakeDir | Application::Realpath | Application::RustFileOps | Application::RustIssue1123 @@ -1108,6 +1130,7 @@ impl Application { | Application::BashShebang | Application::Fork | Application::ReadLink + | Application::MakeDir | Application::Realpath | Application::Go21Issue834 | Application::Go22Issue834 diff --git a/mirrord/layer/tests/mkdir.rs b/mirrord/layer/tests/mkdir.rs new file mode 100644 index 00000000000..5f0fe3301b3 --- /dev/null +++ b/mirrord/layer/tests/mkdir.rs @@ -0,0 +1,31 @@ +#![feature(assert_matches)] +use std::{path::Path, time::Duration}; + +use rstest::rstest; + +mod common; +pub use common::*; + +/// Test for the [`libc::mkdir`] function. +#[rstest] +#[tokio::test] +#[timeout(Duration::from_secs(60))] +async fn mkdir(dylib_path: &Path) { + let application = Application::MakeDir; + + let (mut test_process, mut intproxy) = application + .start_process_with_layer(dylib_path, Default::default(), None) + .await; + + println!("waiting for file request."); + intproxy.expect_make_dir("/mkdir_test_path", 0o777).await; + + println!("waiting for file request."); + intproxy.expect_make_dir("/mkdirat_test_path", 0o777).await; + + assert_eq!(intproxy.try_recv().await, None); + + test_process.wait_assert_success().await; + test_process.assert_no_error_in_stderr().await; + test_process.assert_no_error_in_stdout().await; +} diff --git a/mirrord/protocol/Cargo.toml b/mirrord/protocol/Cargo.toml index 1a36c251030..91f44f8bde5 100644 --- a/mirrord/protocol/Cargo.toml +++ b/mirrord/protocol/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "mirrord-protocol" -version = "1.12.0" +version = "1.13.0" authors.workspace = true description.workspace = true documentation.workspace = true diff --git a/mirrord/protocol/src/codec.rs b/mirrord/protocol/src/codec.rs index e2ed6532e3b..c3d0dcd6751 100644 --- a/mirrord/protocol/src/codec.rs +++ b/mirrord/protocol/src/codec.rs @@ -83,6 +83,8 @@ pub enum FileRequest { /// only. [`ReadDirRequest`]s that come from the layer are transformed into this /// batched form when the protocol version supports it. See [`READDIR_BATCH_VERSION`]. ReadDirBatch(ReadDirBatchRequest), + MakeDir(MakeDirRequest), + MakeDirAt(MakeDirAtRequest), } /// Minimal mirrord-protocol version that allows `ClientMessage::ReadyForLogs` message. @@ -145,6 +147,7 @@ pub enum FileResponse { GetDEnts64(RemoteResult), ReadLink(RemoteResult), ReadDirBatch(RemoteResult), + MakeDir(RemoteResult<()>), } /// `-agent` --> `-layer` messages. diff --git a/mirrord/protocol/src/file.rs b/mirrord/protocol/src/file.rs index e8ff8db8cd3..b2e8e3773f8 100644 --- a/mirrord/protocol/src/file.rs +++ b/mirrord/protocol/src/file.rs @@ -14,10 +14,17 @@ use bincode::{Decode, Encode}; use nix::sys::statfs::Statfs; use semver::VersionReq; +/// Minimal mirrord-protocol version that allows [`ReadLinkFileRequest`]. +pub static READLINK_VERSION: LazyLock = + LazyLock::new(|| ">=1.6.0".parse().expect("Bad Identifier")); + /// Minimal mirrord-protocol version that allows [`ReadDirBatchRequest`]. pub static READDIR_BATCH_VERSION: LazyLock = LazyLock::new(|| ">=1.9.0".parse().expect("Bad Identifier")); +pub static MKDIR_VERSION: LazyLock = + LazyLock::new(|| ">=1.13.0".parse().expect("Bad Identifier")); + /// Internal version of Metadata across operating system (macOS, Linux) /// Only mutual attributes #[derive(Encode, Decode, Debug, PartialEq, Clone, Copy, Eq, Default)] @@ -262,6 +269,19 @@ pub struct ReadLinkFileResponse { pub path: PathBuf, } +#[derive(Encode, Decode, Debug, PartialEq, Eq, Clone)] +pub struct MakeDirRequest { + pub pathname: PathBuf, + pub mode: u32, +} + +#[derive(Encode, Decode, Debug, PartialEq, Eq, Clone)] +pub struct MakeDirAtRequest { + pub dirfd: u64, + pub pathname: PathBuf, + pub mode: u32, +} + #[derive(Encode, Decode, Debug, PartialEq, Eq, Clone)] pub struct ReadLimitedFileRequest { pub remote_fd: u64, diff --git a/tests/go-e2e-dir/main.go b/tests/go-e2e-dir/main.go index 03ce58d8b9b..b608f01b53b 100644 --- a/tests/go-e2e-dir/main.go +++ b/tests/go-e2e-dir/main.go @@ -20,9 +20,16 @@ func main() { os.Exit(-1) } // `os.ReadDir` sorts the result by file name. - if dir[0].Name() != "app.py" || dir[1].Name() != "test.txt"{ + if dir[0].Name() != "app.py" || dir[1].Name() != "test.txt" { os.Exit(-1) } + + err = os.Mkdir("/app/test_mkdir", 0755) + if err != nil { + fmt.Printf("Mkdir error: %s\n", err) + os.Exit(-1) + } + // let close requests be sent for test time.Sleep(1 * time.Second) os.Exit(0) diff --git a/tests/python-e2e/ops.py b/tests/python-e2e/ops.py index d2794d95ad5..8e83271628f 100644 --- a/tests/python-e2e/ops.py +++ b/tests/python-e2e/ops.py @@ -50,6 +50,44 @@ def test_openat(self): os.close(file) os.close(dir) + def test_mkdir(self): + """ + Creates a new directory in "/tmp" and verifies if the directory exists. + """ + os.mkdir("/tmp/test_mkdir") + self.assertTrue(os.path.isdir("/tmp/test_mkdir")) + + def test_mkdirat(self): + """ + Creates a new directory in "/tmp" using mkdirat given the directory file descriptor for "/tmp" and verifies if the directory exists. + """ + dir = os.open( + "/tmp", os.O_RDONLY | os.O_NONBLOCK | os.O_CLOEXEC | os.O_DIRECTORY + ) + os.mkdir("test_mkdirat", dir_fd=dir) + self.assertTrue(os.path.isdir("/tmp/test_mkdirat")) + os.close(dir) + + def test_mkdir_errors(self): + """ + Test different mkdir/mkdirat errors + """ + with self.assertRaises(FileNotFoundError): + os.mkdir("/tmp/test_mkdir_error/test_mkdir") + + os.mkdir("/tmp/test_mkdir_error_already_exists") + + with self.assertRaises(FileExistsError): + os.mkdir("/tmp/test_mkdir_error_already_exists") + + dir = os.open( + "/tmp", os.O_RDONLY | os.O_NONBLOCK | os.O_CLOEXEC | os.O_DIRECTORY + ) + with self.assertRaises(FileExistsError): + os.mkdir("test_mkdir_error_already_exists", dir_fd=dir) + + os.close(dir) + def _create_new_tmp_file(self): """ Creates a new file in /tmp and returns the path and name of the file. diff --git a/tests/src/operator/policies.rs b/tests/src/operator/policies.rs index 215bdc0e455..829157d5d02 100644 --- a/tests/src/operator/policies.rs +++ b/tests/src/operator/policies.rs @@ -7,7 +7,10 @@ use std::{collections::BTreeMap, time::Duration}; use kube::Api; use mirrord_operator::crd::{ label_selector::LabelSelector, - policy::{BlockedFeature, MirrordPolicy, MirrordPolicySpec}, + policy::{ + BlockedFeature, MirrordClusterPolicy, MirrordClusterPolicySpec, MirrordPolicy, + MirrordPolicySpec, + }, }; use rstest::{fixture, rstest}; @@ -21,7 +24,11 @@ struct PolicyGuard { } impl PolicyGuard { - pub async fn new(kube_client: kube::Client, policy: &MirrordPolicy, namespace: &str) -> Self { + pub async fn namespaced( + kube_client: kube::Client, + policy: &MirrordPolicy, + namespace: &str, + ) -> Self { let policy_api: Api = Api::namespaced(kube_client.clone(), namespace); PolicyGuard { _inner: ResourceGuard::create( @@ -34,6 +41,20 @@ impl PolicyGuard { .expect("Could not create policy in E2E test."), } } + + pub async fn clusterwide(kube_client: kube::Client, policy: &MirrordClusterPolicy) -> Self { + let policy_api: Api = Api::all(kube_client.clone()); + PolicyGuard { + _inner: ResourceGuard::create( + policy_api, + policy.metadata.name.clone().unwrap(), + policy, + true, + ) + .await + .expect("Could not create policy in E2E test."), + } + } } enum CanSteal { @@ -218,9 +239,9 @@ fn block_steal_with_unmatching_policy() -> PolicyTestCase { } } -/// Assert that stealing failed due to policy if `expected_success` else succeeded. +/// Assert that port subscription failed due to policy if `expected_success`, else succeeded. /// This function should be timed out - caller's responsibility. -async fn assert_steal_result(mut test_proc: TestProcess, expected_success: bool) { +async fn assert_subscription_result(mut test_proc: TestProcess, expected_success: bool) { if expected_success { test_proc .wait_for_line(Duration::from_secs(40), "daemon subscribed") @@ -259,7 +280,7 @@ async fn run_mirrord_and_verify_steal_result( ) .await; - assert_steal_result(test_proc, can_steal.should_work_without_filter()).await; + assert_subscription_result(test_proc, can_steal.should_work_without_filter()).await; let mut config_path = config_dir().clone(); config_path.push("http_filter_header.json"); @@ -273,7 +294,7 @@ async fn run_mirrord_and_verify_steal_result( ) .await; - assert_steal_result(test_proc, can_steal.should_work_with_filter()).await; + assert_subscription_result(test_proc, can_steal.should_work_with_filter()).await; } /// Set a policy, try to steal both with and without a filter from two services, verify subscribing @@ -297,7 +318,7 @@ pub async fn create_policy_and_try_to_steal( // Create policy, delete it when test exits. let _policy_guard = - PolicyGuard::new(kube_client, &test_case.policy, &service_a.namespace).await; + PolicyGuard::namespaced(kube_client, &test_case.policy, &service_a.namespace).await; println!("Running mirrord against service a"); run_mirrord_and_verify_steal_result( @@ -314,3 +335,50 @@ pub async fn create_policy_and_try_to_steal( ) .await; } + +async fn run_mirrord_and_verify_mirror_result(kube_service: &KubeService, expected_success: bool) { + let application = Application::Go21HTTP; + + let test_proc = application + .run( + &kube_service.target, + Some(&kube_service.namespace), + Some(vec!["--fs-mode=local"]), + None, + ) + .await; + + assert_subscription_result(test_proc, expected_success).await; +} + +#[rstest] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +#[timeout(Duration::from_secs(60))] +pub async fn create_cluster_policy_and_try_to_mirror( + #[future] services: (KubeService, KubeService), + #[future] kube_client: kube::Client, +) { + let kube_client = kube_client.await; + let (service_a, service_b) = services.await; + + // Create policy, delete it when test exits. + let _policy_guard = PolicyGuard::clusterwide( + kube_client, + &MirrordClusterPolicy::new( + "e2e-test-block-mirror-with-path-pattern", + MirrordClusterPolicySpec { + target_path: Some("*-service-a*".into()), + selector: None, + block: vec![BlockedFeature::Mirror], + }, + ), + ) + .await; + + println!("Running mirrord against service a"); + run_mirrord_and_verify_mirror_result(&service_a, false).await; + println!("Running mirrord against service b"); + run_mirrord_and_verify_mirror_result(&service_b, true).await; + println!("Running stealing against service a"); + run_mirrord_and_verify_steal_result(&service_a, false, CanSteal::EvenWithoutFilter).await; +}