From 137dd48cab10f98605428fc5512ff6cf9e6d6cc5 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Sun, 12 Nov 2023 20:59:59 +0900 Subject: [PATCH] Move create_git_tree to Rust --- src/cinnabar-helper.c | 297 --------------------------------------- src/cinnabar/manifest.rs | 2 + src/main.rs | 5 - src/store.rs | 167 +++++++++++++++++++--- 4 files changed, 147 insertions(+), 324 deletions(-) diff --git a/src/cinnabar-helper.c b/src/cinnabar-helper.c index ac8e5f1ff..48812f0cd 100644 --- a/src/cinnabar-helper.c +++ b/src/cinnabar-helper.c @@ -155,303 +155,6 @@ const struct object_id *repo_lookup_replace_object( return lookup_replace_object(r, oid); } -/* The git storage for a mercurial manifest used to be a commit with two - * directories at its root: - * - a git directory, matching the git tree in the git commit corresponding to - * the mercurial changeset using the manifest. - * - a hg directory, containing the same file paths, but where all pointed - * objects are commits (mode 160000 in the git tree) whose sha1 is actually - * the mercurial sha1 for the corresponding mercurial file. - * Reconstructing the mercurial manifest required file paths, mercurial sha1 - * for each file, and the corresponding attribute ("l" for symlinks, "x" for - * executables"). The hg directory alone was not enough for that, because it - * lacked the attribute information. - */ -static void track_tree(struct tree *tree, struct object_list **tree_list) -{ - if (tree_list) { - object_list_insert(&tree->object, tree_list); - tree->object.flags |= SEEN; - } -} - -struct manifest_tree_state { - struct tree *tree; - struct tree_desc desc; -}; - -static int manifest_tree_state_init(const struct object_id *tree_id, - struct manifest_tree_state *result, - struct object_list **tree_list) -{ - result->tree = parse_tree_indirect(tree_id); - if (!result->tree) - return -1; - track_tree(result->tree, tree_list); - - init_tree_desc(&result->desc, result->tree->buffer, - result->tree->size); - return 0; -} - -struct merge_manifest_tree_state { - struct manifest_tree_state state_a, state_b; - struct name_entry entry_a, entry_b; - struct strslice entry_a_path, entry_b_path; - int cmp; -}; - -struct merge_name_entry { - struct name_entry *entry_a, *entry_b; - struct strslice path; -}; - -static int merge_manifest_tree_state_init(const struct object_id *tree_id_a, - const struct object_id *tree_id_b, - struct merge_manifest_tree_state *result, - struct object_list **tree_list) -{ - int ret; - memset(result, 0, sizeof(*result)); - result->cmp = 0; - - if (tree_id_a) { - ret = manifest_tree_state_init(tree_id_a, &result->state_a, tree_list); - if (ret) - return ret; - } else { - result->entry_a_path = empty_strslice(); - result->cmp = 1; - } - if (tree_id_b) { - return manifest_tree_state_init(tree_id_b, &result->state_b, tree_list); - } else if (result->cmp == 0) { - result->entry_b_path = empty_strslice(); - result->cmp = -1; - return 0; - } - return 1; -} - -static int merge_tree_entry(struct merge_manifest_tree_state *state, - struct merge_name_entry *entries) -{ - if (state->cmp <= 0) { - if (tree_entry(&state->state_a.desc, &state->entry_a)) { - state->entry_a_path = strslice_from_str(state->entry_a.path); - } else { - state->entry_a_path = empty_strslice(); - } - } - if (state->cmp >= 0) { - if (tree_entry(&state->state_b.desc, &state->entry_b)) { - state->entry_b_path = strslice_from_str(state->entry_b.path); - } else { - state->entry_b_path = empty_strslice(); - } - } - if (!state->entry_a_path.len) { - if (!state->entry_b_path.len) - return 0; - state->cmp = 1; - } else if (!state->entry_b_path.len) { - state->cmp = -1; - } else { - state->cmp = base_name_compare( - state->entry_a_path.buf, state->entry_a_path.len, state->entry_a.mode, - state->entry_b_path.buf, state->entry_b_path.len, state->entry_b.mode); - } - if (state->cmp <= 0) { - entries->entry_a = &state->entry_a; - entries->path = state->entry_a_path; - } else { - entries->entry_a = NULL; - } - if (state->cmp >= 0) { - entries->entry_b = &state->entry_b; - entries->path = state->entry_b_path; - } else { - entries->entry_b = NULL; - } - return 1; -} - -static struct name_entry * -lazy_tree_entry_by_name(struct manifest_tree_state *state, - const struct object_id *tree_id, - const char *path) -{ - int cmp; - - if (!tree_id) - return NULL; - - if (!state->tree) { - if (manifest_tree_state_init(tree_id, state, NULL)) - return NULL; - } - - while (state->desc.size && - (cmp = strcmp(state->desc.entry.path, path)) < 0) - update_tree_entry(&state->desc); - - if (state->desc.size && cmp == 0) - return &state->desc.entry; - - return NULL; -} - -struct oid_map_entry { - struct hashmap_entry ent; - struct object_id old_oid; - struct object_id new_oid; -}; - -static int oid_map_entry_cmp(const void *cmpdata, const struct hashmap_entry *e1, - const struct hashmap_entry *e2, const void *keydata) -{ - const struct oid_map_entry *entry1 = - container_of(e1, const struct oid_map_entry, ent); - const struct oid_map_entry *entry2 = - container_of(e2, const struct oid_map_entry, ent); - - return oidcmp(&entry1->old_oid, &entry2->old_oid); -} - -static void recurse_create_git_tree(const struct object_id *tree_id, - const struct object_id *reference, - const struct object_id *merge_tree_id, - struct object_id *result, - struct hashmap *cache) -{ - struct oid_map_entry k, *cache_entry = NULL; - - if (!merge_tree_id) { - hashmap_entry_init(&k.ent, oidhash(tree_id)); - oidcpy(&k.old_oid, tree_id); - cache_entry = hashmap_get_entry(cache, &k, ent, NULL); - } - if (!cache_entry) { - struct merge_manifest_tree_state state; - struct manifest_tree_state ref_state = { NULL, }; - struct merge_name_entry entries; - struct strbuf tree_buf = STRBUF_INIT; - - if (merge_manifest_tree_state_init(tree_id, merge_tree_id, &state, NULL)) - goto corrupted; - - while (merge_tree_entry(&state, &entries)) { - struct object_id oid; - struct name_entry *entry = entries.entry_a ? entries.entry_a : entries.entry_b; - unsigned mode = entry->mode; - struct strslice entry_path; - struct strslice underscore = { 1, "_" }; - if (!strslice_startswith(entries.path, underscore)) - goto corrupted; - entry_path = strslice_slice(entries.path, 1, SIZE_MAX); - // In some edge cases, presumably all related to the use of - // `hg convert` before Mercurial 2.0.1, manifest trees have - // double slashes, which end up as "_" directories in the - // corresponding git cinnabar metadata. - // With further changes in the subsequent Mercurial manifests, - // those entries with double slashes are superseded with entries - // with single slash, while still being there. So to create - // the corresponding git commit, we need to merge both in some - // manner. - // Mercurial doesn't actually guarantee which of the paths would - // actually be checked out when checking out such manifests, - // but we always choose the single slash path. Most of the time, - // though, both will have the same contents. At least for files. - // Sub-directories may differ in what paths they contain, but - // again, the files they contain are usually identical. - if (entry_path.len == 0) { - if (!S_ISDIR(mode)) - goto corrupted; - if (merge_tree_id) - continue; - recurse_create_git_tree( - tree_id, reference, &entry->oid, result, cache); - goto cleanup; - } else if (S_ISDIR(mode)) { - struct name_entry *ref_entry; - ref_entry = lazy_tree_entry_by_name( - &ref_state, reference, entry_path.buf); - recurse_create_git_tree( - &entry->oid, - ref_entry ? &ref_entry->oid : NULL, - (entries.entry_b && S_ISDIR(entries.entry_b->mode)) - ? &entries.entry_b->oid : NULL, - &oid, cache); - } else { - const struct object_id *file_oid; - struct hg_object_id hg_oid; - oidcpy2hg(&hg_oid, &entry->oid); - if (is_empty_hg_file(&hg_oid)) - file_oid = ensure_empty_blob(); - else - file_oid = resolve_hg2git(&hg_oid); - if (!file_oid) - goto corrupted; - oidcpy(&oid, file_oid); - mode &= 0777; - if (!mode) - mode = S_IFLNK; - else - mode = S_IFREG | mode; - } - strbuf_addf(&tree_buf, "%o ", canon_mode(mode)); - strbuf_addslice(&tree_buf, entry_path); - strbuf_addch(&tree_buf, '\0'); - strbuf_add(&tree_buf, oid.hash, 20); - } - - if (!merge_tree_id) { - cache_entry = xmalloc(sizeof(k)); - cache_entry->ent = k.ent; - cache_entry->old_oid = k.old_oid; - } - store_git_tree(strbuf_as_slice(&tree_buf), reference, - cache_entry ? &cache_entry->new_oid : result); - strbuf_release(&tree_buf); - if (!merge_tree_id) { - hashmap_add(cache, &cache_entry->ent); - } - -cleanup: - if (state.state_a.tree) - free_tree_buffer(state.state_a.tree); - if (state.state_b.tree) - free_tree_buffer(state.state_b.tree); - if (ref_state.tree) - free_tree_buffer(ref_state.tree); - } - if (result && cache_entry) - oidcpy(result, &cache_entry->new_oid); - return; - -corrupted: - die("Corrupt mercurial metadata"); -} - -static struct hashmap git_tree_cache; - -void init_git_tree_cache(void) -{ - hashmap_init(&git_tree_cache, oid_map_entry_cmp, NULL, 0); -} - -void free_git_tree_cache(void) -{ - hashmap_clear_and_free(&git_tree_cache, struct oid_map_entry, ent); -} - -void create_git_tree(const struct object_id *tree_id, - const struct object_id *ref_tree, - struct object_id *result) -{ - recurse_create_git_tree(tree_id, ref_tree, NULL, result, &git_tree_cache); -} - void init_replace_map(void) { the_repository->objects->replace_map = diff --git a/src/cinnabar/manifest.rs b/src/cinnabar/manifest.rs index 75e8b6b6f..36e19ce8e 100644 --- a/src/cinnabar/manifest.rs +++ b/src/cinnabar/manifest.rs @@ -43,6 +43,8 @@ thread_local! { } impl GitManifestTree { + pub const EMPTY: GitManifestTree = GitManifestTree(RawTree::EMPTY); + pub fn read(oid: GitManifestTreeId) -> Option { MANIFEST_TREE_CACHE.with(|cache| { let (lru_cache, queries, misses) = &mut *cache.borrow_mut(); diff --git a/src/main.rs b/src/main.rs index 4dbdd0c5e..85703feff 100644 --- a/src/main.rs +++ b/src/main.rs @@ -179,8 +179,6 @@ extern "C" { fn init_cinnabar(argv0: *const c_char); - fn init_git_tree_cache(); - fn free_git_tree_cache(); fn reset_replace_map(); static nongit: c_int; } @@ -191,14 +189,12 @@ unsafe fn init_cinnabar_2() -> bool { } let c = get_oid_committish(METADATA_REF.as_bytes()); init_metadata(c); - init_git_tree_cache(); true } pub unsafe fn do_reload(metadata: Option) { let mut c = None; done_cinnabar(); - init_git_tree_cache(); reset_replace_map(); if let Some(metadata) = metadata { @@ -214,7 +210,6 @@ pub unsafe fn do_reload(metadata: Option) { #[no_mangle] pub unsafe extern "C" fn done_cinnabar() { done_metadata(); - free_git_tree_cache(); } static REF_UPDATES: Lazy, CommitId>>> = diff --git a/src/store.rs b/src/store.rs index b1379db8e..37acf270b 100644 --- a/src/store.rs +++ b/src/store.rs @@ -6,12 +6,14 @@ use std::borrow::Cow; use std::cell::{Cell, OnceCell}; use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque}; use std::ffi::OsStr; +use std::hash::Hash; use std::io::{copy, BufRead, BufReader, Read, Write}; use std::iter::{repeat, IntoIterator}; use std::mem; use std::num::NonZeroU32; use std::os::raw::c_int; use std::process::{Command, Stdio}; +use std::ptr; use std::rc::Rc; use std::sync::Mutex; @@ -35,7 +37,7 @@ use crate::cinnabar::{ }; use crate::git::{BlobId, CommitId, GitObjectId, GitOid, RecursedTreeEntry, TreeId, TreeIsh}; use crate::graft::{graft, grafted, replace_map_tablesize, GraftError}; -use crate::hg::{HgChangesetId, HgFileId, HgManifestId, HgObjectId}; +use crate::hg::{HgChangesetId, HgFileAttr, HgFileId, HgManifestId, HgObjectId}; use crate::hg_bundle::{ read_rev_chunk, rev_chunk, BundlePartInfo, BundleSpec, BundleWriter, RevChunkIter, }; @@ -44,11 +46,11 @@ use crate::hg_data::{hash_data, GitAuthorship, HgAuthorship, HgCommitter}; use crate::libcinnabar::{git_notes_tree, hg_notes_tree, strslice, strslice_mut}; use crate::libgit::{ die, for_each_ref_in, get_oid_blob, object_entry, object_id, object_type, strbuf, Commit, - RawBlob, RawCommit, RawTree, RefTransaction, + FileMode, RawBlob, RawCommit, RawTree, RefTransaction, }; use crate::oid::ObjectId; use crate::progress::{progress_enabled, Progress}; -use crate::tree_util::{diff_by_path, Empty, ParseTree, RecurseTree, WithPath}; +use crate::tree_util::{diff_by_path, merge_join_by_path, Empty, ParseTree, RecurseTree, WithPath}; use crate::util::{ FromBytes, ImmutBString, OsStrExt, RcExt, ReadExt, SliceExt, ToBoxed, Transpose, }; @@ -84,6 +86,7 @@ pub struct Metadata { pub flags: MetadataFlags, changeset_heads_: OnceCell, manifest_heads_: OnceCell, + tree_cache_: BTreeMap, } pub static mut METADATA: Metadata = Metadata::default(); @@ -103,6 +106,7 @@ impl Metadata { flags: MetadataFlags::empty(), changeset_heads_: OnceCell::new(), manifest_heads_: OnceCell::new(), + tree_cache_: BTreeMap::new(), } } } @@ -1216,11 +1220,6 @@ extern "C" { reference_entry: *const object_entry, ); pub fn do_set_replace(replaced: *const object_id, replace_with: *const object_id); - fn create_git_tree( - tree_id: *const object_id, - ref_tree: *const object_id, - result: *mut object_id, - ); fn get_object_entry(oid: *const object_id) -> *const object_entry; } @@ -1290,6 +1289,140 @@ impl Metadata { } } +fn corrupted_metata() -> ! { + die!("Corrupt mercurial metadata"); +} + +// The git storage for a mercurial manifest used to be a commit with two +// directories at its root: +// - a git directory, matching the git tree in the git commit corresponding to +// the mercurial changeset using the manifest. +// - a hg directory, containing the same file paths, but where all pointed +// objects are commits (mode 160000 in the git tree) whose sha1 is actually +// the mercurial sha1 for the corresponding mercurial file. +// Reconstructing the mercurial manifest required file paths, mercurial sha1 +// for each file, and the corresponding attribute ("l" for symlinks, "x" for +// executables"). The hg directory alone was not enough for that, because it +// lacked the attribute information. +fn create_git_tree( + metadata: &mut Metadata, + manifest_tree_id: GitManifestTreeId, + ref_tree: Option, + merge_tree_id: Option, +) -> TreeId { + let cached = merge_tree_id + .is_none() + .then(|| metadata.tree_cache_.get(&manifest_tree_id)) + .flatten(); + if let Some(cached) = cached { + return *cached; + } + let manifest_tree = GitManifestTree::read(manifest_tree_id).unwrap(); + let merge_tree = merge_tree_id.map_or(GitManifestTree::EMPTY, |tid| { + GitManifestTree::read(tid).unwrap() + }); + let mut tree_buf = Vec::new(); + for (path, entries) in + merge_join_by_path(manifest_tree.iter(), merge_tree.iter()).map(WithPath::unzip) + { + let entry = entries + .as_ref() + .left() + .or_else(|| entries.as_ref().right()) + .unwrap(); + // In some edge cases, presumably all related to the use of + // `hg convert` before Mercurial 2.0.1, manifest trees have + // double slashes, which end up as "_" directories in the + // corresponding git cinnabar metadata. + // With further changes in the subsequent Mercurial manifests, + // those entries with double slashes are superseded with entries + // with single slash, while still being there. So to create + // the corresponding git commit, we need to merge both in some + // manner. + // Mercurial doesn't actually guarantee which of the paths would + // actually be checked out when checking out such manifests, + // but we always choose the single slash path. Most of the time, + // though, both will have the same contents. At least for files. + // Sub-directories may differ in what paths they contain, but + // again, the files they contain are usually identical. + if path.len() == 0 { + if entry.is_right() { + corrupted_metata(); + } + if merge_tree_id.is_some() { + continue; + } + let result = + create_git_tree(metadata, manifest_tree_id, ref_tree, entry.clone().left()); + metadata.tree_cache_.insert(manifest_tree_id, result); + return result; + } + let (oid, mode): (GitObjectId, _) = match entry { + Either::Left(subtree_id) => { + let ref_entry_oid = ref_tree + .and_then(|tid| { + RawTree::read(tid) + .unwrap() + .iter() + .find(|e| e.path() == path.as_bstr()) + }) + .and_then(|e| e.into_inner().left()); + ( + create_git_tree( + metadata, + *subtree_id, + ref_entry_oid, + entries.right().and_then(Either::left), + ) + .into(), + FileMode::DIRECTORY, + ) + } + Either::Right(entry) => { + let oid = if entry.fid == RawHgFile::EMPTY_OID { + let mut empty_blob_id = object_id::default(); + unsafe { + store_git_blob([].into(), &mut empty_blob_id); + } + let empty_blob_id = BlobId::from_unchecked(empty_blob_id.into()); + assert_eq!(empty_blob_id, RawBlob::EMPTY_OID); + RawBlob::EMPTY_OID + } else if let Some(bid) = metadata.hg2git_mut().get_note(entry.fid.into()) { + BlobId::from_unchecked(bid) + } else { + corrupted_metata(); + }; + ( + oid.into(), + match entry.attr { + HgFileAttr::Regular => FileMode::REGULAR | FileMode::RW, + HgFileAttr::Executable => FileMode::REGULAR | FileMode::RWX, + HgFileAttr::Symlink => FileMode::SYMLINK, + }, + ) + } + }; + write!(tree_buf, "{:o} ", u16::from(mode)).ok(); + tree_buf.extend_from_slice(&path); + tree_buf.extend_from_slice(b"\0"); + tree_buf.extend_from_slice(oid.as_raw_bytes()); + } + let mut result = object_id::default(); + let ref_tree = ref_tree.map(GitObjectId::from).map(object_id::from); + unsafe { + store_git_tree( + tree_buf.as_slice().into(), + ref_tree.as_ref().map_or(ptr::null(), |x| x as *const _), + &mut result, + ); + } + let result = TreeId::from_unchecked(result.into()); + if merge_tree_id.is_none() { + metadata.tree_cache_.insert(manifest_tree_id, result); + } + result +} + fn store_changeset( metadata: &mut Metadata, changeset_id: HgChangesetId, @@ -1303,7 +1436,7 @@ fn store_changeset( .collect::>>() .ok_or(GraftError::NoGraft)?; let changeset = raw_changeset.parse().unwrap(); - let manifest_tree_id = match changeset.manifest() { + let manifest_tree_id = GitManifestTreeId::from_unchecked(match changeset.manifest() { m if m.is_null() => unsafe { let mut tid = object_id::default(); store_git_tree([].into(), std::ptr::null(), &mut tid); @@ -1315,25 +1448,15 @@ fn store_changeset( let manifest_commit = manifest_commit.parse().unwrap(); manifest_commit.tree() } - }; + }); let ref_tree = git_parents.get(0).map(|&p| { let ref_commit = RawCommit::read(p.into()).unwrap(); let ref_commit = ref_commit.parse().unwrap(); - object_id::from(ref_commit.tree()) + ref_commit.tree() }); - let mut tree_id = object_id::default(); - unsafe { - create_git_tree( - &object_id::from(manifest_tree_id), - ref_tree - .as_ref() - .map_or(std::ptr::null(), |t| t as *const _), - &mut tree_id, - ); - } - let tree_id = TreeId::from_unchecked(GitObjectId::from(tree_id)); + let tree_id = create_git_tree(metadata, manifest_tree_id, ref_tree, None); let (commit_id, metadata_id, transition) = match graft(changeset_id, raw_changeset, tree_id, &git_parents) {