diff --git a/README.md b/README.md index 44a0d34..81a030e 100644 --- a/README.md +++ b/README.md @@ -94,3 +94,25 @@ The other options are: > The `separate` option also writes a `layers.json` file in the target directory, > which is a JSON-encoded array of layer directory names. > This array specifies the order of layer application in the image. + +## troubleshooting + +Set `RUST_LOG=debug` to get more detailed logs, and `RUST_LOG=trace` to get extremely detailed logs. +You can also filter to logs in a specific module (such as `circe` or `circe_lib`) +by setting `RUST_LOG=circe=debug` or `RUST_LOG=circe_lib=debug`. + +> [!TIP] +> In macOS and Linux, you can apply environment variables to a command without changing your environment; +> for example: `RUST_LOG=trace circe ...`. + +### future improvements + +These are somewhat "known issues", but mostly "things to keep in mind" when using `circe`. +Ideally we'll fix these in the future; feel free to make a contribution or open an issue letting us know if one of these is blocking you. + +- [ ] circe does not currently download layers concurrently. + Since network transfer is effectively always the bottleneck, adding concurrent downloads would likely speed up `circe` significantly. + That being said, as of our tests today `circe` is already about as fast as `docker pull && docker save`. +- [ ] symlinks are unpacked with the same destination as written in the actual container. + This means e.g. they can link to files outside of the output directory + (the example case I found was files in `usr/bin`, linking to `/bin/`). diff --git a/bin/src/extract.rs b/bin/src/extract.rs index d78bfe6..535c426 100644 --- a/bin/src/extract.rs +++ b/bin/src/extract.rs @@ -2,8 +2,7 @@ use circe_lib::{registry::Registry, LayerDescriptor, Platform, Reference}; use clap::{Parser, ValueEnum}; use color_eyre::eyre::{bail, Context, Result}; use std::{path::PathBuf, str::FromStr}; -use tap::Pipe; -use tracing::info; +use tracing::{debug, info}; #[derive(Debug, Parser)] pub struct Options { @@ -63,7 +62,7 @@ pub enum Mode { #[tracing::instrument] pub async fn main(opts: Options) -> Result<()> { - info!("Extracting image"); + info!("extracting image"); let output = canonicalize_output_dir(&opts.output_dir, opts.overwrite)?; let registry = Registry::builder() @@ -92,6 +91,7 @@ async fn squash( info!("enumerated {count} {}", plural(count, "layer", "layers")); for (descriptor, layer) in layers.zip(1usize..) { + debug!(?descriptor, layer, count, "applying layer"); if count > 0 { info!(layer = %descriptor, "applying layer {layer} of {count}"); } else { @@ -104,6 +104,7 @@ async fn squash( .with_context(|| format!("apply layer {descriptor} to {output:?}"))?; } + info!("finished applying layers"); Ok(()) } @@ -117,6 +118,7 @@ async fn separate( info!("enumerated {count} {}", plural(count, "layer", "layers")); for (descriptor, layer) in layers.iter().zip(1usize..) { + debug!(?descriptor, layer, count, "applying layer"); let output = output.join(descriptor.digest.as_hex()); if count > 0 { info!(layer = %descriptor, "applying layer {layer} of {count}"); @@ -130,16 +132,19 @@ async fn separate( .with_context(|| format!("apply layer {descriptor} to {output:?}"))?; } + info!("finished applying layers"); + let index_destination = output.join("layers.json"); let index = layers .into_iter() .map(|l| l.digest.as_hex()) - .collect::>() - .pipe_ref(serde_json::to_string_pretty) - .context("serialize layer index")?; + .collect::>(); - tokio::fs::write(output.join("layers.json"), index) + debug!(?index, ?index_destination, "serializing layer index"); + let index = serde_json::to_string_pretty(&index).context("serialize layer index")?; + tokio::fs::write(&index_destination, index) .await .context("write layer index") + .inspect(|_| info!(path = ?index_destination, "layer index written")) } /// Given a (probably relative) path to a directory, canonicalize it to an absolute path. diff --git a/bin/src/main.rs b/bin/src/main.rs index 0428c96..bf8ead8 100644 --- a/bin/src/main.rs +++ b/bin/src/main.rs @@ -27,7 +27,8 @@ async fn main() -> Result<()> { tracing_tree::HierarchicalLayer::default() .with_indent_lines(true) .with_indent_amount(2) - .with_thread_ids(true) + .with_thread_ids(false) + .with_thread_names(false) .with_verbose_exit(false) .with_verbose_entry(false) .with_deferred_spans(true) diff --git a/lib/src/registry.rs b/lib/src/registry.rs index 031d936..10b83ba 100644 --- a/lib/src/registry.rs +++ b/lib/src/registry.rs @@ -18,7 +18,7 @@ use oci_client::{ use os_str_bytes::OsStrBytesExt; use tokio_tar::Archive; use tokio_util::io::StreamReader; -use tracing::{info, warn}; +use tracing::{debug, warn}; use crate::{ ext::PriorityFind, @@ -244,10 +244,16 @@ async fn apply_tarball(stream: impl Stream + Unpin, output: &Path) // Whiteout files delete the file from the filesystem. if let Some(path) = is_whiteout(&path) { unwrap_warn!(tokio::fs::remove_file(&path).await, "whiteout: {path:?}"); - info!(?path, "whiteout"); + debug!(?path, "whiteout"); continue; } + // Future improvement: symlinks are unpacked with the same destination as written in the actual container; + // this means e.g. they can link to files outside of the output directory + // (the example case I found was in `usr/bin`, linking to `/bin/`). + // I don't _think_ this matters for now given how we're using this today, but it's technically incorrect. + // To fix this we need to re-implement the logic in `unpack_in` to rewrite symlink destinations. + // Otherwise, apply the file as normal. // Both _new_ and _changed_ files are handled the same way: // the layer contains the entire file content, so we just overwrite the file. @@ -256,7 +262,7 @@ async fn apply_tarball(stream: impl Stream + Unpin, output: &Path) continue; } - info!(?path, "apply"); + debug!(?path, "apply"); } Ok(())