Skip to content

Commit

Permalink
Merge branch 'ps/reftable-reflog-iteration-perf'
Browse files Browse the repository at this point in the history
The code to iterate over reflogs in the reftable has been optimized
to reduce memory allocation and deallocation.

Reviewed-by: Josh Steadmon <[email protected]>
cf. <[email protected]>

* ps/reftable-reflog-iteration-perf:
  refs/reftable: track last log record name via strbuf
  reftable/record: use scratch buffer when decoding records
  reftable/record: reuse message when decoding log records
  reftable/record: reuse refnames when decoding log records
  reftable/record: avoid copying author info
  reftable/record: convert old and new object IDs to arrays
  refs/reftable: reload correct stack when creating reflog iter
  • Loading branch information
gitster committed Mar 21, 2024
2 parents dc97afd + fcacc2b commit e8c1cda
Show file tree
Hide file tree
Showing 10 changed files with 154 additions and 211 deletions.
52 changes: 16 additions & 36 deletions refs/reftable-backend.c
Original file line number Diff line number Diff line change
Expand Up @@ -171,23 +171,6 @@ static int should_write_log(struct ref_store *refs, const char *refname)
}
}

static void clear_reftable_log_record(struct reftable_log_record *log)
{
switch (log->value_type) {
case REFTABLE_LOG_UPDATE:
/*
* When we write log records, the hashes are owned by the
* caller and thus shouldn't be free'd.
*/
log->value.update.old_hash = NULL;
log->value.update.new_hash = NULL;
break;
case REFTABLE_LOG_DELETION:
break;
}
reftable_log_record_release(log);
}

static void fill_reftable_log_record(struct reftable_log_record *log)
{
const char *info = git_committer_info(0);
Expand Down Expand Up @@ -1106,8 +1089,8 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data
fill_reftable_log_record(log);
log->update_index = ts;
log->refname = xstrdup(u->refname);
log->value.update.new_hash = u->new_oid.hash;
log->value.update.old_hash = tx_update->current_oid.hash;
memcpy(log->value.update.new_hash, u->new_oid.hash, GIT_MAX_RAWSZ);
memcpy(log->value.update.old_hash, tx_update->current_oid.hash, GIT_MAX_RAWSZ);
log->value.update.message =
xstrndup(u->msg, arg->refs->write_options.block_size / 2);
}
Expand Down Expand Up @@ -1162,7 +1145,7 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data
done:
assert(ret != REFTABLE_API_ERROR);
for (i = 0; i < logs_nr; i++)
clear_reftable_log_record(&logs[i]);
reftable_log_record_release(&logs[i]);
free(logs);
return ret;
}
Expand Down Expand Up @@ -1279,13 +1262,13 @@ static int write_create_symref_table(struct reftable_writer *writer, void *cb_da
log.update_index = ts;
log.value.update.message = xstrndup(create->logmsg,
create->refs->write_options.block_size / 2);
log.value.update.new_hash = new_oid.hash;
memcpy(log.value.update.new_hash, new_oid.hash, GIT_MAX_RAWSZ);
if (refs_resolve_ref_unsafe(&create->refs->base, create->refname,
RESOLVE_REF_READING, &old_oid, NULL))
log.value.update.old_hash = old_oid.hash;
memcpy(log.value.update.old_hash, old_oid.hash, GIT_MAX_RAWSZ);

ret = reftable_writer_add_log(writer, &log);
clear_reftable_log_record(&log);
reftable_log_record_release(&log);
return ret;
}

Expand Down Expand Up @@ -1424,7 +1407,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
logs[logs_nr].update_index = deletion_ts;
logs[logs_nr].value.update.message =
xstrndup(arg->logmsg, arg->refs->write_options.block_size / 2);
logs[logs_nr].value.update.old_hash = old_ref.value.val1;
memcpy(logs[logs_nr].value.update.old_hash, old_ref.value.val1, GIT_MAX_RAWSZ);
logs_nr++;

ret = read_ref_without_reload(arg->stack, "HEAD", &head_oid, &head_referent, &head_type);
Expand Down Expand Up @@ -1456,7 +1439,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
logs[logs_nr].update_index = creation_ts;
logs[logs_nr].value.update.message =
xstrndup(arg->logmsg, arg->refs->write_options.block_size / 2);
logs[logs_nr].value.update.new_hash = old_ref.value.val1;
memcpy(logs[logs_nr].value.update.new_hash, old_ref.value.val1, GIT_MAX_RAWSZ);
logs_nr++;

/*
Expand Down Expand Up @@ -1519,10 +1502,6 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
for (i = 0; i < logs_nr; i++) {
if (!strcmp(logs[i].refname, "HEAD"))
continue;
if (logs[i].value.update.old_hash == old_ref.value.val1)
logs[i].value.update.old_hash = NULL;
if (logs[i].value.update.new_hash == old_ref.value.val1)
logs[i].value.update.new_hash = NULL;
logs[i].refname = NULL;
reftable_log_record_release(&logs[i]);
}
Expand Down Expand Up @@ -1600,7 +1579,7 @@ struct reftable_reflog_iterator {
struct reftable_ref_store *refs;
struct reftable_iterator iter;
struct reftable_log_record log;
char *last_name;
struct strbuf last_name;
int err;
};

Expand All @@ -1619,15 +1598,15 @@ static int reftable_reflog_iterator_advance(struct ref_iterator *ref_iterator)
* we've already produced this name. This could be faster by
* seeking directly to reflog@update_index==0.
*/
if (iter->last_name && !strcmp(iter->log.refname, iter->last_name))
if (!strcmp(iter->log.refname, iter->last_name.buf))
continue;

if (check_refname_format(iter->log.refname,
REFNAME_ALLOW_ONELEVEL))
continue;

free(iter->last_name);
iter->last_name = xstrdup(iter->log.refname);
strbuf_reset(&iter->last_name);
strbuf_addstr(&iter->last_name, iter->log.refname);
iter->base.refname = iter->log.refname;

break;
Expand Down Expand Up @@ -1660,7 +1639,7 @@ static int reftable_reflog_iterator_abort(struct ref_iterator *ref_iterator)
(struct reftable_reflog_iterator *)ref_iterator;
reftable_log_record_release(&iter->log);
reftable_iterator_destroy(&iter->iter);
free(iter->last_name);
strbuf_release(&iter->last_name);
free(iter);
return ITER_DONE;
}
Expand All @@ -1680,13 +1659,14 @@ static struct reftable_reflog_iterator *reflog_iterator_for_stack(struct reftabl

iter = xcalloc(1, sizeof(*iter));
base_ref_iterator_init(&iter->base, &reftable_reflog_iterator_vtable);
strbuf_init(&iter->last_name, 0);
iter->refs = refs;

ret = refs->err;
if (ret)
goto done;

ret = reftable_stack_reload(refs->main_stack);
ret = reftable_stack_reload(stack);
if (ret < 0)
goto done;

Expand Down Expand Up @@ -2184,7 +2164,7 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
dest->value_type = REFTABLE_LOG_DELETION;
} else {
if ((flags & EXPIRE_REFLOGS_REWRITE) && last_hash)
dest->value.update.old_hash = last_hash;
memcpy(dest->value.update.old_hash, last_hash, GIT_MAX_RAWSZ);
last_hash = logs[i].value.update.new_hash;
}
}
Expand Down
4 changes: 3 additions & 1 deletion reftable/block.c
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,8 @@ int block_iter_next(struct block_iter *it, struct reftable_record *rec)
return REFTABLE_FORMAT_ERROR;

string_view_consume(&in, n);
n = reftable_record_decode(rec, it->last_key, extra, in, it->br->hash_size);
n = reftable_record_decode(rec, it->last_key, extra, in, it->br->hash_size,
&it->scratch);
if (n < 0)
return -1;
string_view_consume(&in, n);
Expand Down Expand Up @@ -369,6 +370,7 @@ int block_iter_seek(struct block_iter *it, struct strbuf *want)
void block_iter_close(struct block_iter *it)
{
strbuf_release(&it->last_key);
strbuf_release(&it->scratch);
}

int block_reader_seek(struct block_reader *br, struct block_iter *it,
Expand Down
2 changes: 2 additions & 0 deletions reftable/block.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,12 @@ struct block_iter {

/* key for last entry we read. */
struct strbuf last_key;
struct strbuf scratch;
};

#define BLOCK_ITER_INIT { \
.last_key = STRBUF_INIT, \
.scratch = STRBUF_INIT, \
}

/* initializes a block reader. */
Expand Down
11 changes: 4 additions & 7 deletions reftable/merged_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -289,16 +289,13 @@ merged_table_from_log_records(struct reftable_log_record **logs,

static void test_merged_logs(void)
{
uint8_t hash1[GIT_SHA1_RAWSZ] = { 1 };
uint8_t hash2[GIT_SHA1_RAWSZ] = { 2 };
uint8_t hash3[GIT_SHA1_RAWSZ] = { 3 };
struct reftable_log_record r1[] = {
{
.refname = "a",
.update_index = 2,
.value_type = REFTABLE_LOG_UPDATE,
.value.update = {
.old_hash = hash2,
.old_hash = { 2 },
/* deletion */
.name = "jane doe",
.email = "jane@invalid",
Expand All @@ -310,8 +307,8 @@ static void test_merged_logs(void)
.update_index = 1,
.value_type = REFTABLE_LOG_UPDATE,
.value.update = {
.old_hash = hash1,
.new_hash = hash2,
.old_hash = { 1 },
.new_hash = { 2 },
.name = "jane doe",
.email = "jane@invalid",
.message = "message1",
Expand All @@ -324,7 +321,7 @@ static void test_merged_logs(void)
.update_index = 3,
.value_type = REFTABLE_LOG_UPDATE,
.value.update = {
.new_hash = hash3,
.new_hash = { 3 },
.name = "jane doe",
.email = "jane@invalid",
.message = "message3",
Expand Down
62 changes: 25 additions & 37 deletions reftable/readwrite_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,18 +77,15 @@ static void write_table(char ***names, struct strbuf *buf, int N,
}

for (i = 0; i < N; i++) {
uint8_t hash[GIT_SHA256_RAWSZ] = { 0 };
char name[100];
int n;

set_test_hash(hash, i);

snprintf(name, sizeof(name), "refs/heads/branch%02d", i);

log.refname = name;
log.update_index = update_index;
log.value_type = REFTABLE_LOG_UPDATE;
log.value.update.new_hash = hash;
set_test_hash(log.value.update.new_hash, i);
log.value.update.message = "message";

n = reftable_writer_add_log(w, &log);
Expand Down Expand Up @@ -137,13 +134,10 @@ static void test_log_buffer_size(void)
/* This tests buffer extension for log compression. Must use a random
hash, to ensure that the compressed part is larger than the original.
*/
uint8_t hash1[GIT_SHA1_RAWSZ], hash2[GIT_SHA1_RAWSZ];
for (i = 0; i < GIT_SHA1_RAWSZ; i++) {
hash1[i] = (uint8_t)(git_rand() % 256);
hash2[i] = (uint8_t)(git_rand() % 256);
log.value.update.old_hash[i] = (uint8_t)(git_rand() % 256);
log.value.update.new_hash[i] = (uint8_t)(git_rand() % 256);
}
log.value.update.old_hash = hash1;
log.value.update.new_hash = hash2;
reftable_writer_set_limits(w, update_index, update_index);
err = reftable_writer_add_log(w, &log);
EXPECT_ERR(err);
Expand All @@ -161,25 +155,26 @@ static void test_log_overflow(void)
.block_size = ARRAY_SIZE(msg),
};
int err;
struct reftable_log_record
log = { .refname = "refs/heads/master",
.update_index = 0xa,
.value_type = REFTABLE_LOG_UPDATE,
.value = { .update = {
.name = "Han-Wen Nienhuys",
.email = "[email protected]",
.tz_offset = 100,
.time = 0x5e430672,
.message = msg,
} } };
struct reftable_log_record log = {
.refname = "refs/heads/master",
.update_index = 0xa,
.value_type = REFTABLE_LOG_UPDATE,
.value = {
.update = {
.old_hash = { 1 },
.new_hash = { 2 },
.name = "Han-Wen Nienhuys",
.email = "[email protected]",
.tz_offset = 100,
.time = 0x5e430672,
.message = msg,
},
},
};
struct reftable_writer *w =
reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts);

uint8_t hash1[GIT_SHA1_RAWSZ] = {1}, hash2[GIT_SHA1_RAWSZ] = { 2 };

memset(msg, 'x', sizeof(msg) - 1);
log.value.update.old_hash = hash1;
log.value.update.new_hash = hash2;
reftable_writer_set_limits(w, update_index, update_index);
err = reftable_writer_add_log(w, &log);
EXPECT(err == REFTABLE_ENTRY_TOO_BIG_ERROR);
Expand Down Expand Up @@ -219,16 +214,13 @@ static void test_log_write_read(void)
EXPECT_ERR(err);
}
for (i = 0; i < N; i++) {
uint8_t hash1[GIT_SHA1_RAWSZ], hash2[GIT_SHA1_RAWSZ];
struct reftable_log_record log = { NULL };
set_test_hash(hash1, i);
set_test_hash(hash2, i + 1);

log.refname = names[i];
log.update_index = i;
log.value_type = REFTABLE_LOG_UPDATE;
log.value.update.old_hash = hash1;
log.value.update.new_hash = hash2;
set_test_hash(log.value.update.old_hash, i);
set_test_hash(log.value.update.new_hash, i + 1);

err = reftable_writer_add_log(w, &log);
EXPECT_ERR(err);
Expand Down Expand Up @@ -298,18 +290,15 @@ static void test_log_zlib_corruption(void)
struct reftable_writer *w =
reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts);
const struct reftable_stats *stats = NULL;
uint8_t hash1[GIT_SHA1_RAWSZ] = { 1 };
uint8_t hash2[GIT_SHA1_RAWSZ] = { 2 };
char message[100] = { 0 };
int err, i, n;

struct reftable_log_record log = {
.refname = "refname",
.value_type = REFTABLE_LOG_UPDATE,
.value = {
.update = {
.new_hash = hash1,
.old_hash = hash2,
.new_hash = { 1 },
.old_hash = { 2 },
.name = "My Name",
.email = "myname@invalid",
.message = message,
Expand Down Expand Up @@ -821,13 +810,12 @@ static void test_write_multiple_indices(void)
}

for (i = 0; i < 100; i++) {
unsigned char hash[GIT_SHA1_RAWSZ] = {i};
struct reftable_log_record log = {
.update_index = 1,
.value_type = REFTABLE_LOG_UPDATE,
.value.update = {
.old_hash = hash,
.new_hash = hash,
.old_hash = { i },
.new_hash = { i },
},
};

Expand Down
Loading

0 comments on commit e8c1cda

Please sign in to comment.