Skip to content

Commit

Permalink
Move create_git_tree to Rust
Browse files Browse the repository at this point in the history
  • Loading branch information
glandium committed Nov 12, 2023
1 parent 563c162 commit 7ccce28
Show file tree
Hide file tree
Showing 4 changed files with 147 additions and 324 deletions.
297 changes: 0 additions & 297 deletions src/cinnabar-helper.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,303 +155,6 @@ const struct object_id *repo_lookup_replace_object(
return lookup_replace_object(r, oid);
}

/* The git storage for a mercurial manifest used to be a commit with two
* directories at its root:
* - a git directory, matching the git tree in the git commit corresponding to
* the mercurial changeset using the manifest.
* - a hg directory, containing the same file paths, but where all pointed
* objects are commits (mode 160000 in the git tree) whose sha1 is actually
* the mercurial sha1 for the corresponding mercurial file.
* Reconstructing the mercurial manifest required file paths, mercurial sha1
* for each file, and the corresponding attribute ("l" for symlinks, "x" for
* executables"). The hg directory alone was not enough for that, because it
* lacked the attribute information.
*/
static void track_tree(struct tree *tree, struct object_list **tree_list)
{
if (tree_list) {
object_list_insert(&tree->object, tree_list);
tree->object.flags |= SEEN;
}
}

struct manifest_tree_state {
struct tree *tree;
struct tree_desc desc;
};

static int manifest_tree_state_init(const struct object_id *tree_id,
struct manifest_tree_state *result,
struct object_list **tree_list)
{
result->tree = parse_tree_indirect(tree_id);
if (!result->tree)
return -1;
track_tree(result->tree, tree_list);

init_tree_desc(&result->desc, result->tree->buffer,
result->tree->size);
return 0;
}

struct merge_manifest_tree_state {
struct manifest_tree_state state_a, state_b;
struct name_entry entry_a, entry_b;
struct strslice entry_a_path, entry_b_path;
int cmp;
};

struct merge_name_entry {
struct name_entry *entry_a, *entry_b;
struct strslice path;
};

static int merge_manifest_tree_state_init(const struct object_id *tree_id_a,
const struct object_id *tree_id_b,
struct merge_manifest_tree_state *result,
struct object_list **tree_list)
{
int ret;
memset(result, 0, sizeof(*result));
result->cmp = 0;

if (tree_id_a) {
ret = manifest_tree_state_init(tree_id_a, &result->state_a, tree_list);
if (ret)
return ret;
} else {
result->entry_a_path = empty_strslice();
result->cmp = 1;
}
if (tree_id_b) {
return manifest_tree_state_init(tree_id_b, &result->state_b, tree_list);
} else if (result->cmp == 0) {
result->entry_b_path = empty_strslice();
result->cmp = -1;
return 0;
}
return 1;
}

static int merge_tree_entry(struct merge_manifest_tree_state *state,
struct merge_name_entry *entries)
{
if (state->cmp <= 0) {
if (tree_entry(&state->state_a.desc, &state->entry_a)) {
state->entry_a_path = strslice_from_str(state->entry_a.path);
} else {
state->entry_a_path = empty_strslice();
}
}
if (state->cmp >= 0) {
if (tree_entry(&state->state_b.desc, &state->entry_b)) {
state->entry_b_path = strslice_from_str(state->entry_b.path);
} else {
state->entry_b_path = empty_strslice();
}
}
if (!state->entry_a_path.len) {
if (!state->entry_b_path.len)
return 0;
state->cmp = 1;
} else if (!state->entry_b_path.len) {
state->cmp = -1;
} else {
state->cmp = base_name_compare(
state->entry_a_path.buf, state->entry_a_path.len, state->entry_a.mode,
state->entry_b_path.buf, state->entry_b_path.len, state->entry_b.mode);
}
if (state->cmp <= 0) {
entries->entry_a = &state->entry_a;
entries->path = state->entry_a_path;
} else {
entries->entry_a = NULL;
}
if (state->cmp >= 0) {
entries->entry_b = &state->entry_b;
entries->path = state->entry_b_path;
} else {
entries->entry_b = NULL;
}
return 1;
}

static struct name_entry *
lazy_tree_entry_by_name(struct manifest_tree_state *state,
const struct object_id *tree_id,
const char *path)
{
int cmp;

if (!tree_id)
return NULL;

if (!state->tree) {
if (manifest_tree_state_init(tree_id, state, NULL))
return NULL;
}

while (state->desc.size &&
(cmp = strcmp(state->desc.entry.path, path)) < 0)
update_tree_entry(&state->desc);

if (state->desc.size && cmp == 0)
return &state->desc.entry;

return NULL;
}

struct oid_map_entry {
struct hashmap_entry ent;
struct object_id old_oid;
struct object_id new_oid;
};

static int oid_map_entry_cmp(const void *cmpdata, const struct hashmap_entry *e1,
const struct hashmap_entry *e2, const void *keydata)
{
const struct oid_map_entry *entry1 =
container_of(e1, const struct oid_map_entry, ent);
const struct oid_map_entry *entry2 =
container_of(e2, const struct oid_map_entry, ent);

return oidcmp(&entry1->old_oid, &entry2->old_oid);
}

static void recurse_create_git_tree(const struct object_id *tree_id,
const struct object_id *reference,
const struct object_id *merge_tree_id,
struct object_id *result,
struct hashmap *cache)
{
struct oid_map_entry k, *cache_entry = NULL;

if (!merge_tree_id) {
hashmap_entry_init(&k.ent, oidhash(tree_id));
oidcpy(&k.old_oid, tree_id);
cache_entry = hashmap_get_entry(cache, &k, ent, NULL);
}
if (!cache_entry) {
struct merge_manifest_tree_state state;
struct manifest_tree_state ref_state = { NULL, };
struct merge_name_entry entries;
struct strbuf tree_buf = STRBUF_INIT;

if (merge_manifest_tree_state_init(tree_id, merge_tree_id, &state, NULL))
goto corrupted;

while (merge_tree_entry(&state, &entries)) {
struct object_id oid;
struct name_entry *entry = entries.entry_a ? entries.entry_a : entries.entry_b;
unsigned mode = entry->mode;
struct strslice entry_path;
struct strslice underscore = { 1, "_" };
if (!strslice_startswith(entries.path, underscore))
goto corrupted;
entry_path = strslice_slice(entries.path, 1, SIZE_MAX);
// In some edge cases, presumably all related to the use of
// `hg convert` before Mercurial 2.0.1, manifest trees have
// double slashes, which end up as "_" directories in the
// corresponding git cinnabar metadata.
// With further changes in the subsequent Mercurial manifests,
// those entries with double slashes are superseded with entries
// with single slash, while still being there. So to create
// the corresponding git commit, we need to merge both in some
// manner.
// Mercurial doesn't actually guarantee which of the paths would
// actually be checked out when checking out such manifests,
// but we always choose the single slash path. Most of the time,
// though, both will have the same contents. At least for files.
// Sub-directories may differ in what paths they contain, but
// again, the files they contain are usually identical.
if (entry_path.len == 0) {
if (!S_ISDIR(mode))
goto corrupted;
if (merge_tree_id)
continue;
recurse_create_git_tree(
tree_id, reference, &entry->oid, result, cache);
goto cleanup;
} else if (S_ISDIR(mode)) {
struct name_entry *ref_entry;
ref_entry = lazy_tree_entry_by_name(
&ref_state, reference, entry_path.buf);
recurse_create_git_tree(
&entry->oid,
ref_entry ? &ref_entry->oid : NULL,
(entries.entry_b && S_ISDIR(entries.entry_b->mode))
? &entries.entry_b->oid : NULL,
&oid, cache);
} else {
const struct object_id *file_oid;
struct hg_object_id hg_oid;
oidcpy2hg(&hg_oid, &entry->oid);
if (is_empty_hg_file(&hg_oid))
file_oid = ensure_empty_blob();
else
file_oid = resolve_hg2git(&hg_oid);
if (!file_oid)
goto corrupted;
oidcpy(&oid, file_oid);
mode &= 0777;
if (!mode)
mode = S_IFLNK;
else
mode = S_IFREG | mode;
}
strbuf_addf(&tree_buf, "%o ", canon_mode(mode));
strbuf_addslice(&tree_buf, entry_path);
strbuf_addch(&tree_buf, '\0');
strbuf_add(&tree_buf, oid.hash, 20);
}

if (!merge_tree_id) {
cache_entry = xmalloc(sizeof(k));
cache_entry->ent = k.ent;
cache_entry->old_oid = k.old_oid;
}
store_git_tree(strbuf_as_slice(&tree_buf), reference,
cache_entry ? &cache_entry->new_oid : result);
strbuf_release(&tree_buf);
if (!merge_tree_id) {
hashmap_add(cache, &cache_entry->ent);
}

cleanup:
if (state.state_a.tree)
free_tree_buffer(state.state_a.tree);
if (state.state_b.tree)
free_tree_buffer(state.state_b.tree);
if (ref_state.tree)
free_tree_buffer(ref_state.tree);
}
if (result && cache_entry)
oidcpy(result, &cache_entry->new_oid);
return;

corrupted:
die("Corrupt mercurial metadata");
}

static struct hashmap git_tree_cache;

void init_git_tree_cache(void)
{
hashmap_init(&git_tree_cache, oid_map_entry_cmp, NULL, 0);
}

void free_git_tree_cache(void)
{
hashmap_clear_and_free(&git_tree_cache, struct oid_map_entry, ent);
}

void create_git_tree(const struct object_id *tree_id,
const struct object_id *ref_tree,
struct object_id *result)
{
recurse_create_git_tree(tree_id, ref_tree, NULL, result, &git_tree_cache);
}

void init_replace_map(void)
{
the_repository->objects->replace_map =
Expand Down
2 changes: 2 additions & 0 deletions src/cinnabar/manifest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ thread_local! {
}

impl GitManifestTree {
pub const EMPTY: GitManifestTree = GitManifestTree(RawTree::EMPTY);

pub fn read(oid: GitManifestTreeId) -> Option<GitManifestTree> {
MANIFEST_TREE_CACHE.with(|cache| {
let (lru_cache, queries, misses) = &mut *cache.borrow_mut();
Expand Down
5 changes: 0 additions & 5 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,6 @@ extern "C" {

fn init_cinnabar(argv0: *const c_char);

fn init_git_tree_cache();
fn free_git_tree_cache();
fn reset_replace_map();
static nongit: c_int;
}
Expand All @@ -191,14 +189,12 @@ unsafe fn init_cinnabar_2() -> bool {
}
let c = get_oid_committish(METADATA_REF.as_bytes());
init_metadata(c);
init_git_tree_cache();
true
}

pub unsafe fn do_reload(metadata: Option<CommitId>) {
let mut c = None;
done_cinnabar();
init_git_tree_cache();

reset_replace_map();
if let Some(metadata) = metadata {
Expand All @@ -214,7 +210,6 @@ pub unsafe fn do_reload(metadata: Option<CommitId>) {
#[no_mangle]
pub unsafe extern "C" fn done_cinnabar() {
done_metadata();
free_git_tree_cache();
}

static REF_UPDATES: Lazy<Mutex<HashMap<Box<BStr>, CommitId>>> =
Expand Down
Loading

0 comments on commit 7ccce28

Please sign in to comment.