Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

handle many packs better #1750

Merged
merged 2 commits into from
Jan 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion gix-odb/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ pub struct Store {

/// The below state acts like a slot-map with each slot is mutable when the write lock is held, but readable independently of it.
/// This allows multiple file to be loaded concurrently if there is multiple handles requesting to load packs or additional indices.
/// The map is static and cannot typically change.
/// The map is static and cannot change.
/// It's read often and changed rarely.
pub(crate) files: Vec<types::MutableIndexAndPack>,

Expand Down
12 changes: 9 additions & 3 deletions gix-odb/src/store_impls/dynamic/load_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ impl super::Store {
Option::as_ref(&files_guard).expect("slot is set or we wouldn't know it points to this file");
if index_info.is_multi_index() && files.mtime() != mtime {
// we have a changed multi-pack index. We can't just change the existing slot as it may alter slot indices
// that are currently available. Instead we have to move what's there into a new slot, along with the changes,
// that are currently available. Instead, we have to move what's there into a new slot, along with the changes,
// and later free the slot or dispose of the index in the slot (like we do for removed/missing files).
index_paths_to_add.push_back((index_info, mtime, Some(slot_idx)));
// If the current slot is loaded, the soon-to-be copied multi-index path will be loaded as well.
Expand Down Expand Up @@ -304,6 +304,12 @@ impl super::Store {
needed: index_paths_to_add.len() + 1, /*the one currently popped off*/
});
}
// Don't allow duplicate indicates, we need a 1:1 mapping.
if new_slot_map_indices.contains(&next_possibly_free_index) {
next_possibly_free_index = (next_possibly_free_index + 1) % self.files.len();
num_indices_checked += 1;
continue 'increment_slot_index;
}
let slot_index = next_possibly_free_index;
let slot = &self.files[slot_index];
next_possibly_free_index = (next_possibly_free_index + 1) % self.files.len();
Expand Down Expand Up @@ -502,7 +508,7 @@ impl super::Store {
}
// Unlike libgit2, do not sort by modification date, but by size and put the biggest indices first. That way
// the chance to hit an object should be higher. We leave it to the handle to sort by LRU.
// Git itself doesn't change the order which may safe time, but we want it to be stable which also helps some tests.
// Git itself doesn't change the order which may save time, but we want it to be stable which also helps some tests.
// NOTE: this will work well for well-packed repos or those using geometric repacking, but force us to open a lot
// of files when dealing with new objects, as there is no notion of recency here as would be with unmaintained
// repositories. Different algorithms should be provided, like newest packs first, and possibly a mix of both
Expand All @@ -512,7 +518,7 @@ impl super::Store {
Ok(indices_by_modification_time)
}

/// returns Ok<dest slot was empty> if the copy could happen because dest-slot was actually free or disposable , and Some(true) if it was empty
/// returns `Ok(dest_slot_was_empty)` if the copy could happen because dest-slot was actually free or disposable.
#[allow(clippy::too_many_arguments)]
fn try_set_index_slot(
lock: &parking_lot::MutexGuard<'_, ()>,
Expand Down
8 changes: 8 additions & 0 deletions gix/src/commit.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
//!
#![allow(clippy::empty_docs)]

use std::convert::Infallible;

/// An empty array of a type usable with the `gix::easy` API to help declaring no parents should be used
pub const NO_PARENT_IDS: [gix_hash::ObjectId; 0] = [];

Expand All @@ -22,6 +24,12 @@ pub enum Error {
ReferenceEdit(#[from] crate::reference::edit::Error),
}

impl From<std::convert::Infallible> for Error {
fn from(_value: Infallible) -> Self {
unreachable!("cannot be invoked")
}
}

///
#[cfg(feature = "revision")]
pub mod describe {
Expand Down
2 changes: 2 additions & 0 deletions gix/src/remote/connection/fetch/update_refs/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ pub(crate) fn update(
let update = if is_implicit_tag {
Mode::ImplicitTagNotSentByRemote.into()
} else {
// Assure the ODB is not to blame for the missing object.
repo.try_find_object(remote_id)?;
Mode::RejectedSourceObjectNotFound { id: remote_id.into() }.into()
};
updates.push(update);
Expand Down
2 changes: 2 additions & 0 deletions gix/src/remote/connection/fetch/update_refs/update.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ mod error {
PeelToId(#[from] crate::reference::peel::Error),
#[error("Failed to follow a symbolic reference to assure worktree isn't affected")]
FollowSymref(#[from] gix_ref::file::find::existing::Error),
#[error(transparent)]
FindObject(#[from] crate::object::find::Error),
}
}

Expand Down
68 changes: 68 additions & 0 deletions gix/tests/gix/remote/fetch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,74 @@ mod blocking_and_async_io {
try_repo_rw(name).unwrap()
}

#[test]
#[cfg(feature = "blocking-network-client")]
fn fetch_more_packs_than_can_be_handled() -> gix_testtools::Result {
use gix::config::tree::User;
use gix::interrupt::IS_INTERRUPTED;
use gix_odb::store::init::Slots;
use gix_testtools::tempfile;
fn create_empty_commit(repo: &gix::Repository) -> anyhow::Result<()> {
let name = repo.head_name()?.expect("no detached head");
repo.commit(
name.as_bstr(),
"empty",
gix::hash::ObjectId::empty_tree(repo.object_hash()),
repo.try_find_reference(name.as_ref())?.map(|r| r.id()),
)?;
Ok(())
}
for max_packs in 1..=3 {
let remote_dir = tempfile::tempdir()?;
let mut remote_repo = gix::init_bare(remote_dir.path())?;
{
let mut config = remote_repo.config_snapshot_mut();
config.set_value(&User::NAME, "author")?;
config.set_value(&User::EMAIL, "[email protected]")?;
}
create_empty_commit(&remote_repo)?;

let local_dir = tempfile::tempdir()?;
let (local_repo, _) = gix::clone::PrepareFetch::new(
remote_repo.path(),
local_dir.path(),
gix::create::Kind::Bare,
Default::default(),
gix::open::Options::isolated().object_store_slots(Slots::Given(max_packs)),
)?
.fetch_only(gix::progress::Discard, &IS_INTERRUPTED)?;

let remote = local_repo
.branch_remote(
local_repo.head_ref()?.expect("branch available").name().shorten(),
Fetch,
)
.expect("remote is configured after clone")?;
for _round_to_create_pack in 1..12 {
create_empty_commit(&remote_repo)?;
match remote
.connect(Fetch)?
.prepare_fetch(gix::progress::Discard, Default::default())?
.receive(gix::progress::Discard, &IS_INTERRUPTED)
{
Ok(out) => {
for local_tracking_branch_name in out.ref_map.mappings.into_iter().filter_map(|m| m.local) {
let r = local_repo.find_reference(&local_tracking_branch_name)?;
r.id()
.object()
.expect("object should be present after fetching, triggering pack refreshes works");
local_repo.head_ref()?.unwrap().set_target_id(r.id(), "post fetch")?;
}
}
Err(err) => assert!(err
.to_string()
.starts_with("The slotmap turned out to be too small with ")),
}
}
}
Ok(())
}

#[test]
#[cfg(feature = "blocking-network-client")]
#[allow(clippy::result_large_err)]
Expand Down
Loading