Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: cache sources too #1226

Merged
merged 16 commits into from
Dec 4, 2024
14 changes: 8 additions & 6 deletions src/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,12 +114,14 @@ pub async fn run_build(

let directories = output.build_configuration.directories.clone();

let output = output
.fetch_sources(tool_configuration)
.await
.into_diagnostic()?;

let output = output.build_or_fetch_cache(tool_configuration).await?;
let output = if output.recipe.cache.is_some() {
output.build_or_fetch_cache(tool_configuration).await?
} else {
output
.fetch_sources(tool_configuration)
.await
.into_diagnostic()?
};

let output = output
.resolve_dependencies(tool_configuration)
Expand Down
235 changes: 117 additions & 118 deletions src/cache.rs
Original file line number Diff line number Diff line change
@@ -1,25 +1,30 @@
//! Functions to deal with the build cache
use std::{
collections::{BTreeMap, HashSet},
path::{Path, PathBuf},
path::PathBuf,
};

use fs_err as fs;
use memchr::memmem;
use memmap2::Mmap;
use miette::{Context, IntoDiagnostic};
use minijinja::Value;
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};

use crate::{
env_vars,
metadata::{build_reindexed_channels, Output},
packaging::{contains_prefix_binary, contains_prefix_text, content_type, Files},
recipe::parser::{Dependency, Requirements},
packaging::Files,
recipe::{
parser::{Dependency, Requirements, Source},
Jinja,
},
render::resolved_dependencies::{
install_environments, resolve_dependencies, FinalizedDependencies,
},
source::copy_dir::{copy_file, create_symlink, CopyOptions},
source::{
copy_dir::{copy_file, CopyDir, CopyOptions},
fetch_sources,
},
};

/// Error type for cache key generation
Expand All @@ -39,10 +44,19 @@ pub enum CacheKeyError {
pub struct Cache {
/// The requirements that were used to build the cache
pub requirements: Requirements,

/// The finalized dependencies
pub finalized_dependencies: FinalizedDependencies,

/// The finalized sources
pub finalized_sources: Vec<Source>,

/// The prefix files that are included in the cache
pub prefix_files: Vec<(PathBuf, bool)>,
pub prefix_files: Vec<PathBuf>,

/// The (dirty) source files that are included in the cache
pub work_dir_files: Vec<PathBuf>,

/// The prefix that was used at build time (needs to be replaced when
/// restoring the files)
pub prefix: PathBuf,
Expand Down Expand Up @@ -88,74 +102,62 @@ impl Output {
self.build_configuration.build_platform.platform.to_string(),
);

let cache_key = (cache, selected_variant);
let cache_key = (cache, selected_variant, self.prefix());
// serialize to json and hash
let mut hasher = Sha256::new();
let serialized = serde_json::to_string(&cache_key)?;
hasher.update(serialized.as_bytes());
let result = hasher.finalize();
Ok(format!("{:x}", result))
cache_key.serialize(&mut serde_json::Serializer::new(&mut hasher))?;
Ok(format!("{:x}", hasher.finalize()))
} else {
Err(CacheKeyError::NoCacheKeyAvailable)
}
}

/// Restore an existing cache from a cache directory
async fn restore_cache(&self, cache_dir: PathBuf) -> Result<Output, miette::Error> {
let cache: Cache = serde_json::from_str(
&fs::read_to_string(cache_dir.join("cache.json")).into_diagnostic()?,
async fn restore_cache(
&self,
cache: Cache,
cache_dir: PathBuf,
) -> Result<Output, miette::Error> {
let cache_prefix_dir = cache_dir.join("prefix");
let copied_prefix = CopyDir::new(&cache_prefix_dir, self.prefix())
.run()
.into_diagnostic()?;

// restore the work dir files
let cache_dir_work = cache_dir.join("work_dir");
let copied_cache = CopyDir::new(
&cache_dir_work,
&self.build_configuration.directories.work_dir,
)
.run()
.into_diagnostic()?;
let copy_options = CopyOptions {
skip_exist: true,
..Default::default()
};
let cache_prefix = cache.prefix;

let mut paths_created = HashSet::new();
for (file, has_prefix) in &cache.prefix_files {
tracing::info!("Restoring from cache: {:?}", file);
let dest = self.prefix().join(file);
let source = &cache_dir.join("prefix").join(file);
copy_file(source, &dest, &mut paths_created, &copy_options).into_diagnostic()?;

// check if the symlink starts with the old prefix, and if yes, make the symlink
// absolute with the new prefix
if source.is_symlink() {
let symlink_target = fs::read_link(source).into_diagnostic()?;
if let Ok(rest) = symlink_target.strip_prefix(&cache_prefix) {
let new_symlink_target = self.prefix().join(rest);
fs::remove_file(&dest).into_diagnostic()?;
create_symlink(&new_symlink_target, &dest).into_diagnostic()?;
}
}

if *has_prefix {
replace_prefix(&dest, &cache_prefix, self.prefix())?;
}
}
let combined_files = copied_prefix.copied_paths().len() + copied_cache.copied_paths().len();
tracing::info!(
"Restored {} source and prefix files from cache",
combined_files
);

Ok(Output {
finalized_cache_dependencies: Some(cache.finalized_dependencies.clone()),
finalized_cache_sources: Some(cache.finalized_sources.clone()),
..self.clone()
})
}

/// This will fetch sources and build the cache if it doesn't exist
/// Note: this modifies the output in place
pub(crate) async fn build_or_fetch_cache(
&self,
mut self,
tool_configuration: &crate::tool_configuration::Configuration,
) -> Result<Self, miette::Error> {
// if we don't have a cache, we need to run the cache build with our current
// workdir, and then return the cache
let span = tracing::info_span!("Running cache build");
let _enter = span.enter();

let target_platform = self.build_configuration.target_platform;
let mut env_vars = env_vars::vars(self, "BUILD");
env_vars.extend(env_vars::os_vars(self.prefix(), &target_platform));
if let Some(cache) = self.recipe.cache.clone() {
// if we don't have a cache, we need to run the cache build with our current
// workdir, and then return the cache
let span = tracing::info_span!("Running cache build");
let _enter = span.enter();

if let Some(cache) = &self.recipe.cache {
tracing::info!("Cache key: {:?}", self.cache_key().into_diagnostic()?);
tracing::info!("using cache key: {:?}", self.cache_key().into_diagnostic()?);
let cache_key = format!("bld_{}", self.cache_key().into_diagnostic()?);

let cache_dir = self
Expand All @@ -166,24 +168,66 @@ impl Output {

// restore the cache if it exists by copying the files to the prefix
if cache_dir.exists() {
tracing::info!("Restoring cache from {:?}", cache_dir);
return self.restore_cache(cache_dir).await;
let text = fs::read_to_string(cache_dir.join("cache.json")).into_diagnostic()?;
match serde_json::from_str::<Cache>(&text) {
Ok(cache) => {
tracing::info!("Restoring cache from {:?}", cache_dir);
self = self
.fetch_sources(tool_configuration)
.await
.into_diagnostic()?;
return self.restore_cache(cache, cache_dir).await;
}
Err(e) => {
tracing::error!(
"Failed to parse cache at {}: {:?} - rebuilding",
cache_dir.join("cache.json").display(),
e
);
// remove the cache dir and run as normal
fs::remove_dir_all(&cache_dir).into_diagnostic()?;
}
}
}

// fetch the sources for the `cache` section
let rendered_sources = fetch_sources(
self.finalized_cache_sources
.as_ref()
.unwrap_or(&cache.source),
&self.build_configuration.directories,
&self.system_tools,
tool_configuration,
)
.await
.into_diagnostic()?;

let target_platform = self.build_configuration.target_platform;
let mut env_vars = env_vars::vars(&self, "BUILD");
env_vars.extend(env_vars::os_vars(self.prefix(), &target_platform));

// Reindex the channels
let channels = build_reindexed_channels(&self.build_configuration, tool_configuration)
.into_diagnostic()
.context("failed to reindex output channel")?;

let finalized_dependencies =
resolve_dependencies(&cache.requirements, self, &channels, tool_configuration)
resolve_dependencies(&cache.requirements, &self, &channels, tool_configuration)
.await
.unwrap();

install_environments(self, &finalized_dependencies, tool_configuration)
install_environments(&self, &finalized_dependencies, tool_configuration)
.await
.into_diagnostic()?;

let selector_config = self.build_configuration.selector_config();
let mut jinja = Jinja::new(selector_config.clone());
for (k, v) in self.recipe.context.iter() {
jinja
.context_mut()
.insert(k.clone(), Value::from_safe_string(v.clone()));
}

cache
.build
.script()
Expand All @@ -193,7 +237,7 @@ impl Output {
&self.build_configuration.directories.recipe_dir,
&self.build_configuration.directories.host_prefix,
Some(&self.build_configuration.directories.build_prefix),
None, // TODO fix this to be proper Jinja context
Some(jinja),
)
.await
.into_diagnostic()?;
Expand All @@ -213,6 +257,7 @@ impl Output {
let mut creation_cache = HashSet::new();
let mut copied_files = Vec::new();
let copy_options = CopyOptions::default();

for file in &new_files.new_files {
// skip directories (if they are not a symlink)
// directories are implicitly created by the files
Expand All @@ -224,28 +269,24 @@ impl Output {
.expect("File should be in prefix");
let dest = &prefix_cache_dir.join(stripped);
copy_file(file, dest, &mut creation_cache, &copy_options).into_diagnostic()?;

// Defend against broken symlinks here!
if !file.is_symlink() {
// check if the file contains the prefix
let content_type = content_type(file).into_diagnostic()?;
let has_prefix = if content_type.map(|c| c.is_text()).unwrap_or(false) {
contains_prefix_text(file, self.prefix(), self.target_platform())
} else {
contains_prefix_binary(file, self.prefix())
}
.into_diagnostic()?;
copied_files.push((stripped.to_path_buf(), has_prefix));
} else {
copied_files.push((stripped.to_path_buf(), false));
}
copied_files.push(stripped.to_path_buf());
}

// We also need to copy the work dir files to the cache
let work_dir_files = CopyDir::new(
&self.build_configuration.directories.work_dir.clone(),
&cache_dir.join("work_dir"),
)
.run()
.into_diagnostic()?;

// save the cache
let cache = Cache {
requirements: cache.requirements.clone(),
finalized_dependencies: finalized_dependencies.clone(),
finalized_sources: rendered_sources.clone(),
prefix_files: copied_files,
work_dir_files: work_dir_files.copied_paths().to_vec(),
prefix: self.prefix().to_path_buf(),
};

Expand All @@ -254,53 +295,11 @@ impl Output {

Ok(Output {
finalized_cache_dependencies: Some(finalized_dependencies),
..self.clone()
finalized_cache_sources: Some(rendered_sources),
..self
})
} else {
Ok(self.clone())
Ok(self)
}
}
}

/// Simple replace prefix function that does a direct replacement without any
/// padding considerations because we know that the prefix is the same length as
/// the original prefix.
fn replace_prefix(file: &Path, old_prefix: &Path, new_prefix: &Path) -> Result<(), miette::Error> {
// mmap the file, and use the fast string search to find the prefix
let output = {
let map_file = fs::File::open(file).into_diagnostic()?;
let mmap = unsafe { Mmap::map(&map_file).into_diagnostic()? };
let new_prefix_bytes = new_prefix.as_os_str().as_encoded_bytes();
let old_prefix_bytes = old_prefix.as_os_str().as_encoded_bytes();

// if the prefix is the same, we don't need to do anything
if old_prefix == new_prefix {
return Ok(());
}

assert_eq!(
new_prefix_bytes.len(),
old_prefix_bytes.len(),
"Prefixes must have the same length: {:?} != {:?}",
new_prefix,
old_prefix
);

let mut output = Vec::with_capacity(mmap.len());
let mut last_match_end = 0;
let finder = memmem::Finder::new(old_prefix_bytes);

while let Some(index) = finder.find(&mmap[last_match_end..]) {
let absolute_index = last_match_end + index;
output.extend_from_slice(&mmap[last_match_end..absolute_index]);
output.extend_from_slice(new_prefix_bytes);
last_match_end = absolute_index + new_prefix_bytes.len();
}
output.extend_from_slice(&mmap[last_match_end..]);
output
// close file & mmap at end of scope
};

// overwrite the file
fs::write(file, output).into_diagnostic()
}
Loading
Loading