From eea0d11d6d7114c850e50fa3f247b2ecf9a330fc Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 5 Mar 2024 13:10:55 +0100 Subject: [PATCH 1/7] refs/reftable: reload correct stack when creating reflog iter When creating a new reflog iterator, we first have to reload the stack that the iterator is being created. This is done so that any concurrent writes to the stack are reflected. But `reflog_iterator_for_stack()` always reloads the main stack, which is wrong. Fix this and reload the correct stack. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/reftable-backend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 249a618b5a9cb9..f04be942ac394d 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -1682,7 +1682,7 @@ static struct reftable_reflog_iterator *reflog_iterator_for_stack(struct reftabl if (ret) goto done; - ret = reftable_stack_reload(refs->main_stack); + ret = reftable_stack_reload(stack); if (ret < 0) goto done; From 87ff723018bfca588b5d68e110ab04494c451ebd Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 5 Mar 2024 13:10:59 +0100 Subject: [PATCH 2/7] reftable/record: convert old and new object IDs to arrays In 7af607c58d (reftable/record: store "val1" hashes as static arrays, 2024-01-03) and b31e3cc620 (reftable/record: store "val2" hashes as static arrays, 2024-01-03) we have converted ref records to store their object IDs in a static array. Convert log records to do the same so that their old and new object IDs are arrays, too. This change results in two allocations less per log record that we're iterating over. Before: HEAP SUMMARY: in use at exit: 13,473 bytes in 122 blocks total heap usage: 8,068,495 allocs, 8,068,373 frees, 401,011,862 bytes allocated After: HEAP SUMMARY: in use at exit: 13,473 bytes in 122 blocks total heap usage: 6,068,489 allocs, 6,068,367 frees, 361,011,822 bytes allocated Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/reftable-backend.c | 39 ++++++------------------ reftable/merged_test.c | 11 +++---- reftable/readwrite_test.c | 62 +++++++++++++++----------------------- reftable/record.c | 59 ++++++------------------------------ reftable/record_test.c | 11 ------- reftable/reftable-record.h | 4 +-- reftable/stack_test.c | 26 +++++++--------- 7 files changed, 61 insertions(+), 151 deletions(-) diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index f04be942ac394d..2de57c047a25ab 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -171,23 +171,6 @@ static int should_write_log(struct ref_store *refs, const char *refname) } } -static void clear_reftable_log_record(struct reftable_log_record *log) -{ - switch (log->value_type) { - case REFTABLE_LOG_UPDATE: - /* - * When we write log records, the hashes are owned by the - * caller and thus shouldn't be free'd. - */ - log->value.update.old_hash = NULL; - log->value.update.new_hash = NULL; - break; - case REFTABLE_LOG_DELETION: - break; - } - reftable_log_record_release(log); -} - static void fill_reftable_log_record(struct reftable_log_record *log) { const char *info = git_committer_info(0); @@ -1102,8 +1085,8 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data fill_reftable_log_record(log); log->update_index = ts; log->refname = xstrdup(u->refname); - log->value.update.new_hash = u->new_oid.hash; - log->value.update.old_hash = tx_update->current_oid.hash; + memcpy(log->value.update.new_hash, u->new_oid.hash, GIT_MAX_RAWSZ); + memcpy(log->value.update.old_hash, tx_update->current_oid.hash, GIT_MAX_RAWSZ); log->value.update.message = xstrndup(u->msg, arg->refs->write_options.block_size / 2); } @@ -1158,7 +1141,7 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data done: assert(ret != REFTABLE_API_ERROR); for (i = 0; i < logs_nr; i++) - clear_reftable_log_record(&logs[i]); + reftable_log_record_release(&logs[i]); free(logs); return ret; } @@ -1275,13 +1258,13 @@ static int write_create_symref_table(struct reftable_writer *writer, void *cb_da log.update_index = ts; log.value.update.message = xstrndup(create->logmsg, create->refs->write_options.block_size / 2); - log.value.update.new_hash = new_oid.hash; + memcpy(log.value.update.new_hash, new_oid.hash, GIT_MAX_RAWSZ); if (refs_resolve_ref_unsafe(&create->refs->base, create->refname, RESOLVE_REF_READING, &old_oid, NULL)) - log.value.update.old_hash = old_oid.hash; + memcpy(log.value.update.old_hash, old_oid.hash, GIT_MAX_RAWSZ); ret = reftable_writer_add_log(writer, &log); - clear_reftable_log_record(&log); + reftable_log_record_release(&log); return ret; } @@ -1420,7 +1403,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data) logs[logs_nr].update_index = deletion_ts; logs[logs_nr].value.update.message = xstrndup(arg->logmsg, arg->refs->write_options.block_size / 2); - logs[logs_nr].value.update.old_hash = old_ref.value.val1; + memcpy(logs[logs_nr].value.update.old_hash, old_ref.value.val1, GIT_MAX_RAWSZ); logs_nr++; ret = read_ref_without_reload(arg->stack, "HEAD", &head_oid, &head_referent, &head_type); @@ -1452,7 +1435,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data) logs[logs_nr].update_index = creation_ts; logs[logs_nr].value.update.message = xstrndup(arg->logmsg, arg->refs->write_options.block_size / 2); - logs[logs_nr].value.update.new_hash = old_ref.value.val1; + memcpy(logs[logs_nr].value.update.new_hash, old_ref.value.val1, GIT_MAX_RAWSZ); logs_nr++; /* @@ -1515,10 +1498,6 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data) for (i = 0; i < logs_nr; i++) { if (!strcmp(logs[i].refname, "HEAD")) continue; - if (logs[i].value.update.old_hash == old_ref.value.val1) - logs[i].value.update.old_hash = NULL; - if (logs[i].value.update.new_hash == old_ref.value.val1) - logs[i].value.update.new_hash = NULL; logs[i].refname = NULL; reftable_log_record_release(&logs[i]); } @@ -2180,7 +2159,7 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store, dest->value_type = REFTABLE_LOG_DELETION; } else { if ((flags & EXPIRE_REFLOGS_REWRITE) && last_hash) - dest->value.update.old_hash = last_hash; + memcpy(dest->value.update.old_hash, last_hash, GIT_MAX_RAWSZ); last_hash = logs[i].value.update.new_hash; } } diff --git a/reftable/merged_test.c b/reftable/merged_test.c index d0f77a3b8f51e0..530fc82d1c2458 100644 --- a/reftable/merged_test.c +++ b/reftable/merged_test.c @@ -289,16 +289,13 @@ merged_table_from_log_records(struct reftable_log_record **logs, static void test_merged_logs(void) { - uint8_t hash1[GIT_SHA1_RAWSZ] = { 1 }; - uint8_t hash2[GIT_SHA1_RAWSZ] = { 2 }; - uint8_t hash3[GIT_SHA1_RAWSZ] = { 3 }; struct reftable_log_record r1[] = { { .refname = "a", .update_index = 2, .value_type = REFTABLE_LOG_UPDATE, .value.update = { - .old_hash = hash2, + .old_hash = { 2 }, /* deletion */ .name = "jane doe", .email = "jane@invalid", @@ -310,8 +307,8 @@ static void test_merged_logs(void) .update_index = 1, .value_type = REFTABLE_LOG_UPDATE, .value.update = { - .old_hash = hash1, - .new_hash = hash2, + .old_hash = { 1 }, + .new_hash = { 2 }, .name = "jane doe", .email = "jane@invalid", .message = "message1", @@ -324,7 +321,7 @@ static void test_merged_logs(void) .update_index = 3, .value_type = REFTABLE_LOG_UPDATE, .value.update = { - .new_hash = hash3, + .new_hash = { 3 }, .name = "jane doe", .email = "jane@invalid", .message = "message3", diff --git a/reftable/readwrite_test.c b/reftable/readwrite_test.c index 363fe0f998b91f..a6dbd214c5d2f8 100644 --- a/reftable/readwrite_test.c +++ b/reftable/readwrite_test.c @@ -77,18 +77,15 @@ static void write_table(char ***names, struct strbuf *buf, int N, } for (i = 0; i < N; i++) { - uint8_t hash[GIT_SHA256_RAWSZ] = { 0 }; char name[100]; int n; - set_test_hash(hash, i); - snprintf(name, sizeof(name), "refs/heads/branch%02d", i); log.refname = name; log.update_index = update_index; log.value_type = REFTABLE_LOG_UPDATE; - log.value.update.new_hash = hash; + set_test_hash(log.value.update.new_hash, i); log.value.update.message = "message"; n = reftable_writer_add_log(w, &log); @@ -137,13 +134,10 @@ static void test_log_buffer_size(void) /* This tests buffer extension for log compression. Must use a random hash, to ensure that the compressed part is larger than the original. */ - uint8_t hash1[GIT_SHA1_RAWSZ], hash2[GIT_SHA1_RAWSZ]; for (i = 0; i < GIT_SHA1_RAWSZ; i++) { - hash1[i] = (uint8_t)(git_rand() % 256); - hash2[i] = (uint8_t)(git_rand() % 256); + log.value.update.old_hash[i] = (uint8_t)(git_rand() % 256); + log.value.update.new_hash[i] = (uint8_t)(git_rand() % 256); } - log.value.update.old_hash = hash1; - log.value.update.new_hash = hash2; reftable_writer_set_limits(w, update_index, update_index); err = reftable_writer_add_log(w, &log); EXPECT_ERR(err); @@ -161,25 +155,26 @@ static void test_log_overflow(void) .block_size = ARRAY_SIZE(msg), }; int err; - struct reftable_log_record - log = { .refname = "refs/heads/master", - .update_index = 0xa, - .value_type = REFTABLE_LOG_UPDATE, - .value = { .update = { - .name = "Han-Wen Nienhuys", - .email = "hanwen@google.com", - .tz_offset = 100, - .time = 0x5e430672, - .message = msg, - } } }; + struct reftable_log_record log = { + .refname = "refs/heads/master", + .update_index = 0xa, + .value_type = REFTABLE_LOG_UPDATE, + .value = { + .update = { + .old_hash = { 1 }, + .new_hash = { 2 }, + .name = "Han-Wen Nienhuys", + .email = "hanwen@google.com", + .tz_offset = 100, + .time = 0x5e430672, + .message = msg, + }, + }, + }; struct reftable_writer *w = reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); - uint8_t hash1[GIT_SHA1_RAWSZ] = {1}, hash2[GIT_SHA1_RAWSZ] = { 2 }; - memset(msg, 'x', sizeof(msg) - 1); - log.value.update.old_hash = hash1; - log.value.update.new_hash = hash2; reftable_writer_set_limits(w, update_index, update_index); err = reftable_writer_add_log(w, &log); EXPECT(err == REFTABLE_ENTRY_TOO_BIG_ERROR); @@ -219,16 +214,13 @@ static void test_log_write_read(void) EXPECT_ERR(err); } for (i = 0; i < N; i++) { - uint8_t hash1[GIT_SHA1_RAWSZ], hash2[GIT_SHA1_RAWSZ]; struct reftable_log_record log = { NULL }; - set_test_hash(hash1, i); - set_test_hash(hash2, i + 1); log.refname = names[i]; log.update_index = i; log.value_type = REFTABLE_LOG_UPDATE; - log.value.update.old_hash = hash1; - log.value.update.new_hash = hash2; + set_test_hash(log.value.update.old_hash, i); + set_test_hash(log.value.update.new_hash, i + 1); err = reftable_writer_add_log(w, &log); EXPECT_ERR(err); @@ -298,18 +290,15 @@ static void test_log_zlib_corruption(void) struct reftable_writer *w = reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); const struct reftable_stats *stats = NULL; - uint8_t hash1[GIT_SHA1_RAWSZ] = { 1 }; - uint8_t hash2[GIT_SHA1_RAWSZ] = { 2 }; char message[100] = { 0 }; int err, i, n; - struct reftable_log_record log = { .refname = "refname", .value_type = REFTABLE_LOG_UPDATE, .value = { .update = { - .new_hash = hash1, - .old_hash = hash2, + .new_hash = { 1 }, + .old_hash = { 2 }, .name = "My Name", .email = "myname@invalid", .message = message, @@ -821,13 +810,12 @@ static void test_write_multiple_indices(void) } for (i = 0; i < 100; i++) { - unsigned char hash[GIT_SHA1_RAWSZ] = {i}; struct reftable_log_record log = { .update_index = 1, .value_type = REFTABLE_LOG_UPDATE, .value.update = { - .old_hash = hash, - .new_hash = hash, + .old_hash = { i }, + .new_hash = { i }, }, }; diff --git a/reftable/record.c b/reftable/record.c index 367de046006b25..8d2cd6b081e5e3 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -763,16 +763,10 @@ static void reftable_log_record_copy_from(void *rec, const void *src_rec, xstrdup(dst->value.update.message); } - if (dst->value.update.new_hash) { - REFTABLE_ALLOC_ARRAY(dst->value.update.new_hash, hash_size); - memcpy(dst->value.update.new_hash, - src->value.update.new_hash, hash_size); - } - if (dst->value.update.old_hash) { - REFTABLE_ALLOC_ARRAY(dst->value.update.old_hash, hash_size); - memcpy(dst->value.update.old_hash, - src->value.update.old_hash, hash_size); - } + memcpy(dst->value.update.new_hash, + src->value.update.new_hash, hash_size); + memcpy(dst->value.update.old_hash, + src->value.update.old_hash, hash_size); break; } } @@ -790,8 +784,6 @@ void reftable_log_record_release(struct reftable_log_record *r) case REFTABLE_LOG_DELETION: break; case REFTABLE_LOG_UPDATE: - reftable_free(r->value.update.new_hash); - reftable_free(r->value.update.old_hash); reftable_free(r->value.update.name); reftable_free(r->value.update.email); reftable_free(r->value.update.message); @@ -808,33 +800,20 @@ static uint8_t reftable_log_record_val_type(const void *rec) return reftable_log_record_is_deletion(log) ? 0 : 1; } -static uint8_t zero[GIT_SHA256_RAWSZ] = { 0 }; - static int reftable_log_record_encode(const void *rec, struct string_view s, int hash_size) { const struct reftable_log_record *r = rec; struct string_view start = s; int n = 0; - uint8_t *oldh = NULL; - uint8_t *newh = NULL; if (reftable_log_record_is_deletion(r)) return 0; - oldh = r->value.update.old_hash; - newh = r->value.update.new_hash; - if (!oldh) { - oldh = zero; - } - if (!newh) { - newh = zero; - } - if (s.len < 2 * hash_size) return -1; - memcpy(s.buf, oldh, hash_size); - memcpy(s.buf + hash_size, newh, hash_size); + memcpy(s.buf, r->value.update.old_hash, hash_size); + memcpy(s.buf + hash_size, r->value.update.new_hash, hash_size); string_view_consume(&s, 2 * hash_size); n = encode_string(r->value.update.name ? r->value.update.name : "", s); @@ -891,8 +870,6 @@ static int reftable_log_record_decode(void *rec, struct strbuf key, if (val_type != r->value_type) { switch (r->value_type) { case REFTABLE_LOG_UPDATE: - FREE_AND_NULL(r->value.update.old_hash); - FREE_AND_NULL(r->value.update.new_hash); FREE_AND_NULL(r->value.update.message); FREE_AND_NULL(r->value.update.email); FREE_AND_NULL(r->value.update.name); @@ -909,11 +886,6 @@ static int reftable_log_record_decode(void *rec, struct strbuf key, if (in.len < 2 * hash_size) return REFTABLE_FORMAT_ERROR; - r->value.update.old_hash = - reftable_realloc(r->value.update.old_hash, hash_size); - r->value.update.new_hash = - reftable_realloc(r->value.update.new_hash, hash_size); - memcpy(r->value.update.old_hash, in.buf, hash_size); memcpy(r->value.update.new_hash, in.buf + hash_size, hash_size); @@ -983,17 +955,6 @@ static int null_streq(char *a, char *b) return 0 == strcmp(a, b); } -static int zero_hash_eq(uint8_t *a, uint8_t *b, int sz) -{ - if (!a) - a = zero; - - if (!b) - b = zero; - - return !memcmp(a, b, sz); -} - static int reftable_log_record_equal_void(const void *a, const void *b, int hash_size) { @@ -1037,10 +998,10 @@ int reftable_log_record_equal(const struct reftable_log_record *a, b->value.update.email) && null_streq(a->value.update.message, b->value.update.message) && - zero_hash_eq(a->value.update.old_hash, - b->value.update.old_hash, hash_size) && - zero_hash_eq(a->value.update.new_hash, - b->value.update.new_hash, hash_size); + !memcmp(a->value.update.old_hash, + b->value.update.old_hash, hash_size) && + !memcmp(a->value.update.new_hash, + b->value.update.new_hash, hash_size); } abort(); diff --git a/reftable/record_test.c b/reftable/record_test.c index 89209894d84395..57e56138c0cf69 100644 --- a/reftable/record_test.c +++ b/reftable/record_test.c @@ -183,8 +183,6 @@ static void test_reftable_log_record_roundtrip(void) .value_type = REFTABLE_LOG_UPDATE, .value = { .update = { - .old_hash = reftable_malloc(GIT_SHA1_RAWSZ), - .new_hash = reftable_malloc(GIT_SHA1_RAWSZ), .name = xstrdup("han-wen"), .email = xstrdup("hanwen@google.com"), .message = xstrdup("test"), @@ -202,13 +200,6 @@ static void test_reftable_log_record_roundtrip(void) .refname = xstrdup("branch"), .update_index = 33, .value_type = REFTABLE_LOG_UPDATE, - .value = { - .update = { - .old_hash = reftable_malloc(GIT_SHA1_RAWSZ), - .new_hash = reftable_malloc(GIT_SHA1_RAWSZ), - /* rest of fields left empty. */ - }, - }, } }; set_test_hash(in[0].value.update.new_hash, 1); @@ -231,8 +222,6 @@ static void test_reftable_log_record_roundtrip(void) .value_type = REFTABLE_LOG_UPDATE, .value = { .update = { - .new_hash = reftable_calloc(GIT_SHA1_RAWSZ, 1), - .old_hash = reftable_calloc(GIT_SHA1_RAWSZ, 1), .name = xstrdup("old name"), .email = xstrdup("old@email"), .message = xstrdup("old message"), diff --git a/reftable/reftable-record.h b/reftable/reftable-record.h index e657001d42fc9e..2659ea008ca460 100644 --- a/reftable/reftable-record.h +++ b/reftable/reftable-record.h @@ -88,8 +88,8 @@ struct reftable_log_record { union { struct { - uint8_t *new_hash; - uint8_t *old_hash; + unsigned char new_hash[GIT_MAX_RAWSZ]; + unsigned char old_hash[GIT_MAX_RAWSZ]; char *name; char *email; uint64_t time; diff --git a/reftable/stack_test.c b/reftable/stack_test.c index 509f4866236024..7336757cf53405 100644 --- a/reftable/stack_test.c +++ b/reftable/stack_test.c @@ -468,8 +468,6 @@ static void test_reftable_stack_add(void) logs[i].refname = xstrdup(buf); logs[i].update_index = N + i + 1; logs[i].value_type = REFTABLE_LOG_UPDATE; - - logs[i].value.update.new_hash = reftable_malloc(GIT_SHA1_RAWSZ); logs[i].value.update.email = xstrdup("identity@invalid"); set_test_hash(logs[i].value.update.new_hash, i); } @@ -547,16 +545,17 @@ static void test_reftable_stack_log_normalize(void) }; struct reftable_stack *st = NULL; char *dir = get_tmp_dir(__LINE__); - - uint8_t h1[GIT_SHA1_RAWSZ] = { 0x01 }, h2[GIT_SHA1_RAWSZ] = { 0x02 }; - - struct reftable_log_record input = { .refname = "branch", - .update_index = 1, - .value_type = REFTABLE_LOG_UPDATE, - .value = { .update = { - .new_hash = h1, - .old_hash = h2, - } } }; + struct reftable_log_record input = { + .refname = "branch", + .update_index = 1, + .value_type = REFTABLE_LOG_UPDATE, + .value = { + .update = { + .new_hash = { 1 }, + .old_hash = { 2 }, + }, + }, + }; struct reftable_log_record dest = { .update_index = 0, }; @@ -627,8 +626,6 @@ static void test_reftable_stack_tombstone(void) logs[i].update_index = 42; if (i % 2 == 0) { logs[i].value_type = REFTABLE_LOG_UPDATE; - logs[i].value.update.new_hash = - reftable_malloc(GIT_SHA1_RAWSZ); set_test_hash(logs[i].value.update.new_hash, i); logs[i].value.update.email = xstrdup("identity@invalid"); @@ -810,7 +807,6 @@ static void test_reflog_expire(void) logs[i].update_index = i; logs[i].value_type = REFTABLE_LOG_UPDATE; logs[i].value.update.time = i; - logs[i].value.update.new_hash = reftable_malloc(GIT_SHA1_RAWSZ); logs[i].value.update.email = xstrdup("identity@invalid"); set_test_hash(logs[i].value.update.new_hash, i); } From 01639ec148f3b5ecd4460a2a5720f987c0b6a247 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 5 Mar 2024 13:11:03 +0100 Subject: [PATCH 3/7] reftable/record: avoid copying author info Each reflog entry contains information regarding the authorship of who has made the change. This authorship information is not the same as that of any of the commits that the reflog entry references, but instead corresponds to the local user that has executed the command. Thus, it is almost always the case that all reflog entries have the same author. We can make use of this fact when decoding reftable records: instead of freeing and then reallocating the authorship information of log records, we can special-case when the next record during an iteration has the exact same authorship as the preceding record. If so, then there is no need to reallocate the respective fields. This change results in two allocations less per log record that we're iterating over in the most common case. Before: HEAP SUMMARY: in use at exit: 13,473 bytes in 122 blocks total heap usage: 6,068,489 allocs, 6,068,367 frees, 361,011,822 bytes allocated After: HEAP SUMMARY: in use at exit: 13,473 bytes in 122 blocks total heap usage: 4,068,487 allocs, 4,068,365 frees, 332,011,793 bytes allocated An alternative would be to store the capacity of both name and email and then use `REFTABLE_ALLOC_GROW()` to conditionally reallocate the array. But reftable records are copied around quite a lot, and thus we need to be a bit mindful of the overall record size. Furthermore, a memory comparison should also be more efficient than having to copy over memory even if we wouldn't have to allocate a new array every time. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/record.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/reftable/record.c b/reftable/record.c index 8d2cd6b081e5e3..d816de6d9333af 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -896,10 +896,19 @@ static int reftable_log_record_decode(void *rec, struct strbuf key, goto done; string_view_consume(&in, n); - r->value.update.name = - reftable_realloc(r->value.update.name, dest.len + 1); - memcpy(r->value.update.name, dest.buf, dest.len); - r->value.update.name[dest.len] = 0; + /* + * In almost all cases we can expect the reflog name to not change for + * reflog entries as they are tied to the local identity, not to the + * target commits. As an optimization for this common case we can thus + * skip copying over the name in case it's accurate already. + */ + if (!r->value.update.name || + strcmp(r->value.update.name, dest.buf)) { + r->value.update.name = + reftable_realloc(r->value.update.name, dest.len + 1); + memcpy(r->value.update.name, dest.buf, dest.len); + r->value.update.name[dest.len] = 0; + } strbuf_reset(&dest); n = decode_string(&dest, in); @@ -907,10 +916,14 @@ static int reftable_log_record_decode(void *rec, struct strbuf key, goto done; string_view_consume(&in, n); - r->value.update.email = - reftable_realloc(r->value.update.email, dest.len + 1); - memcpy(r->value.update.email, dest.buf, dest.len); - r->value.update.email[dest.len] = 0; + /* Same as above, but for the reflog email. */ + if (!r->value.update.email || + strcmp(r->value.update.email, dest.buf)) { + r->value.update.email = + reftable_realloc(r->value.update.email, dest.len + 1); + memcpy(r->value.update.email, dest.buf, dest.len); + r->value.update.email[dest.len] = 0; + } ts = 0; n = get_var_int(&ts, &in); From 193fcb3ff82184c3c7b7e3d0da9d0b7aaa648b91 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 5 Mar 2024 13:11:07 +0100 Subject: [PATCH 4/7] reftable/record: reuse refnames when decoding log records When decoding a log record we always reallocate their refname arrays. This results in quite a lot of needless allocation churn. Refactor the code to grow the array as required only. Like this, we should usually only end up reallocating the array a small handful of times when iterating over many refs. Before: HEAP SUMMARY: in use at exit: 13,473 bytes in 122 blocks total heap usage: 4,068,487 allocs, 4,068,365 frees, 332,011,793 bytes allocated After: HEAP SUMMARY: in use at exit: 13,473 bytes in 122 blocks total heap usage: 3,068,488 allocs, 3,068,366 frees, 307,122,961 bytes allocated Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/record.c | 2 +- reftable/reftable-record.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/reftable/record.c b/reftable/record.c index d816de6d9333af..82780b2d06fa21 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -861,7 +861,7 @@ static int reftable_log_record_decode(void *rec, struct strbuf key, if (key.len <= 9 || key.buf[key.len - 9] != 0) return REFTABLE_FORMAT_ERROR; - r->refname = reftable_realloc(r->refname, key.len - 8); + REFTABLE_ALLOC_GROW(r->refname, key.len - 8, r->refname_cap); memcpy(r->refname, key.buf, key.len - 8); ts = get_be64(key.buf + key.len - 8); diff --git a/reftable/reftable-record.h b/reftable/reftable-record.h index 2659ea008ca460..d28f38175cd472 100644 --- a/reftable/reftable-record.h +++ b/reftable/reftable-record.h @@ -74,6 +74,7 @@ int reftable_ref_record_equal(const struct reftable_ref_record *a, /* reftable_log_record holds a reflog entry */ struct reftable_log_record { char *refname; + size_t refname_cap; uint64_t update_index; /* logical timestamp of a transactional update. */ From e0bd13beea95fa26fa1159a26051f21237f45088 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 5 Mar 2024 13:11:12 +0100 Subject: [PATCH 5/7] reftable/record: reuse message when decoding log records Same as the preceding commit we can allocate log messages as needed when decoding log records, thus further reducing the number of allocations. Before: HEAP SUMMARY: in use at exit: 13,473 bytes in 122 blocks total heap usage: 3,068,488 allocs, 3,068,366 frees, 307,122,961 bytes allocated After: HEAP SUMMARY: in use at exit: 13,473 bytes in 122 blocks total heap usage: 2,068,487 allocs, 2,068,365 frees, 305,122,946 bytes allocated Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/record.c | 5 +++-- reftable/reftable-record.h | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/reftable/record.c b/reftable/record.c index 82780b2d06fa21..7c868775860082 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -871,6 +871,7 @@ static int reftable_log_record_decode(void *rec, struct strbuf key, switch (r->value_type) { case REFTABLE_LOG_UPDATE: FREE_AND_NULL(r->value.update.message); + r->value.update.message_cap = 0; FREE_AND_NULL(r->value.update.email); FREE_AND_NULL(r->value.update.name); break; @@ -943,8 +944,8 @@ static int reftable_log_record_decode(void *rec, struct strbuf key, goto done; string_view_consume(&in, n); - r->value.update.message = - reftable_realloc(r->value.update.message, dest.len + 1); + REFTABLE_ALLOC_GROW(r->value.update.message, dest.len + 1, + r->value.update.message_cap); memcpy(r->value.update.message, dest.buf, dest.len); r->value.update.message[dest.len] = 0; diff --git a/reftable/reftable-record.h b/reftable/reftable-record.h index d28f38175cd472..2a2943cd134ceb 100644 --- a/reftable/reftable-record.h +++ b/reftable/reftable-record.h @@ -96,6 +96,7 @@ struct reftable_log_record { uint64_t time; int16_t tz_offset; char *message; + size_t message_cap; } update; } value; }; From 7b8abc4d8cd428e4ce044e50f7980baacaccb761 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 5 Mar 2024 13:11:16 +0100 Subject: [PATCH 6/7] reftable/record: use scratch buffer when decoding records When decoding log records we need a temporary buffer to decode the reflog entry's name, mail address and message. As this buffer is local to the function we thus have to reallocate it for every single log record which we're about to decode, which is inefficient. Refactor the code such that callers need to pass in a scratch buffer, which allows us to reuse it for multiple decodes. This reduces the number of allocations when iterating through reflogs. Before: HEAP SUMMARY: in use at exit: 13,473 bytes in 122 blocks total heap usage: 2,068,487 allocs, 2,068,365 frees, 305,122,946 bytes allocated After: HEAP SUMMARY: in use at exit: 13,473 bytes in 122 blocks total heap usage: 1,068,485 allocs, 1,068,363 frees, 281,122,886 bytes allocated Note that this commit also drop some redundant calls to `strbuf_reset()` right before calling `decode_string()`. The latter already knows to reset the buffer, so there is no need for these. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/block.c | 4 ++- reftable/block.h | 2 ++ reftable/record.c | 52 ++++++++++++++++++-------------------- reftable/record.h | 5 ++-- reftable/record_test.c | 57 ++++++++++++++++++++++++++---------------- 5 files changed, 68 insertions(+), 52 deletions(-) diff --git a/reftable/block.c b/reftable/block.c index ad9074dba6121e..b67300a7340ccc 100644 --- a/reftable/block.c +++ b/reftable/block.c @@ -332,7 +332,8 @@ int block_iter_next(struct block_iter *it, struct reftable_record *rec) return REFTABLE_FORMAT_ERROR; string_view_consume(&in, n); - n = reftable_record_decode(rec, it->last_key, extra, in, it->br->hash_size); + n = reftable_record_decode(rec, it->last_key, extra, in, it->br->hash_size, + &it->scratch); if (n < 0) return -1; string_view_consume(&in, n); @@ -369,6 +370,7 @@ int block_iter_seek(struct block_iter *it, struct strbuf *want) void block_iter_close(struct block_iter *it) { strbuf_release(&it->last_key); + strbuf_release(&it->scratch); } int block_reader_seek(struct block_reader *br, struct block_iter *it, diff --git a/reftable/block.h b/reftable/block.h index 51699af233d823..47acc62c0ab8cd 100644 --- a/reftable/block.h +++ b/reftable/block.h @@ -84,10 +84,12 @@ struct block_iter { /* key for last entry we read. */ struct strbuf last_key; + struct strbuf scratch; }; #define BLOCK_ITER_INIT { \ .last_key = STRBUF_INIT, \ + .scratch = STRBUF_INIT, \ } /* initializes a block reader. */ diff --git a/reftable/record.c b/reftable/record.c index 7c868775860082..060244337f5617 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -374,7 +374,7 @@ static int reftable_ref_record_encode(const void *rec, struct string_view s, static int reftable_ref_record_decode(void *rec, struct strbuf key, uint8_t val_type, struct string_view in, - int hash_size) + int hash_size, struct strbuf *scratch) { struct reftable_ref_record *r = rec; struct string_view start = in; @@ -425,13 +425,12 @@ static int reftable_ref_record_decode(void *rec, struct strbuf key, break; case REFTABLE_REF_SYMREF: { - struct strbuf dest = STRBUF_INIT; - int n = decode_string(&dest, in); + int n = decode_string(scratch, in); if (n < 0) { return -1; } string_view_consume(&in, n); - r->value.symref = dest.buf; + r->value.symref = strbuf_detach(scratch, NULL); } break; case REFTABLE_REF_DELETION: @@ -579,7 +578,7 @@ static int reftable_obj_record_encode(const void *rec, struct string_view s, static int reftable_obj_record_decode(void *rec, struct strbuf key, uint8_t val_type, struct string_view in, - int hash_size) + int hash_size, struct strbuf *scratch UNUSED) { struct string_view start = in; struct reftable_obj_record *r = rec; @@ -849,13 +848,12 @@ static int reftable_log_record_encode(const void *rec, struct string_view s, static int reftable_log_record_decode(void *rec, struct strbuf key, uint8_t val_type, struct string_view in, - int hash_size) + int hash_size, struct strbuf *scratch) { struct string_view start = in; struct reftable_log_record *r = rec; uint64_t max = 0; uint64_t ts = 0; - struct strbuf dest = STRBUF_INIT; int n; if (key.len <= 9 || key.buf[key.len - 9] != 0) @@ -892,7 +890,7 @@ static int reftable_log_record_decode(void *rec, struct strbuf key, string_view_consume(&in, 2 * hash_size); - n = decode_string(&dest, in); + n = decode_string(scratch, in); if (n < 0) goto done; string_view_consume(&in, n); @@ -904,26 +902,25 @@ static int reftable_log_record_decode(void *rec, struct strbuf key, * skip copying over the name in case it's accurate already. */ if (!r->value.update.name || - strcmp(r->value.update.name, dest.buf)) { + strcmp(r->value.update.name, scratch->buf)) { r->value.update.name = - reftable_realloc(r->value.update.name, dest.len + 1); - memcpy(r->value.update.name, dest.buf, dest.len); - r->value.update.name[dest.len] = 0; + reftable_realloc(r->value.update.name, scratch->len + 1); + memcpy(r->value.update.name, scratch->buf, scratch->len); + r->value.update.name[scratch->len] = 0; } - strbuf_reset(&dest); - n = decode_string(&dest, in); + n = decode_string(scratch, in); if (n < 0) goto done; string_view_consume(&in, n); /* Same as above, but for the reflog email. */ if (!r->value.update.email || - strcmp(r->value.update.email, dest.buf)) { + strcmp(r->value.update.email, scratch->buf)) { r->value.update.email = - reftable_realloc(r->value.update.email, dest.len + 1); - memcpy(r->value.update.email, dest.buf, dest.len); - r->value.update.email[dest.len] = 0; + reftable_realloc(r->value.update.email, scratch->len + 1); + memcpy(r->value.update.email, scratch->buf, scratch->len); + r->value.update.email[scratch->len] = 0; } ts = 0; @@ -938,22 +935,19 @@ static int reftable_log_record_decode(void *rec, struct strbuf key, r->value.update.tz_offset = get_be16(in.buf); string_view_consume(&in, 2); - strbuf_reset(&dest); - n = decode_string(&dest, in); + n = decode_string(scratch, in); if (n < 0) goto done; string_view_consume(&in, n); - REFTABLE_ALLOC_GROW(r->value.update.message, dest.len + 1, + REFTABLE_ALLOC_GROW(r->value.update.message, scratch->len + 1, r->value.update.message_cap); - memcpy(r->value.update.message, dest.buf, dest.len); - r->value.update.message[dest.len] = 0; + memcpy(r->value.update.message, scratch->buf, scratch->len); + r->value.update.message[scratch->len] = 0; - strbuf_release(&dest); return start.len - in.len; done: - strbuf_release(&dest); return REFTABLE_FORMAT_ERROR; } @@ -1093,7 +1087,7 @@ static int reftable_index_record_encode(const void *rec, struct string_view out, static int reftable_index_record_decode(void *rec, struct strbuf key, uint8_t val_type, struct string_view in, - int hash_size) + int hash_size, struct strbuf *scratch UNUSED) { struct string_view start = in; struct reftable_index_record *r = rec; @@ -1174,10 +1168,12 @@ uint8_t reftable_record_val_type(struct reftable_record *rec) } int reftable_record_decode(struct reftable_record *rec, struct strbuf key, - uint8_t extra, struct string_view src, int hash_size) + uint8_t extra, struct string_view src, int hash_size, + struct strbuf *scratch) { return reftable_record_vtable(rec)->decode(reftable_record_data(rec), - key, extra, src, hash_size); + key, extra, src, hash_size, + scratch); } void reftable_record_release(struct reftable_record *rec) diff --git a/reftable/record.h b/reftable/record.h index 5e8304e05284c7..826ee1c55c3b64 100644 --- a/reftable/record.h +++ b/reftable/record.h @@ -55,7 +55,8 @@ struct reftable_record_vtable { /* decode data from `src` into the record. */ int (*decode)(void *rec, struct strbuf key, uint8_t extra, - struct string_view src, int hash_size); + struct string_view src, int hash_size, + struct strbuf *scratch); /* deallocate and null the record. */ void (*release)(void *rec); @@ -138,7 +139,7 @@ int reftable_record_encode(struct reftable_record *rec, struct string_view dest, int hash_size); int reftable_record_decode(struct reftable_record *rec, struct strbuf key, uint8_t extra, struct string_view src, - int hash_size); + int hash_size, struct strbuf *scratch); int reftable_record_is_deletion(struct reftable_record *rec); static inline uint8_t reftable_record_type(struct reftable_record *rec) diff --git a/reftable/record_test.c b/reftable/record_test.c index 57e56138c0cf69..c158ee79ffe36a 100644 --- a/reftable/record_test.c +++ b/reftable/record_test.c @@ -99,6 +99,7 @@ static void set_hash(uint8_t *h, int j) static void test_reftable_ref_record_roundtrip(void) { + struct strbuf scratch = STRBUF_INIT; int i = 0; for (i = REFTABLE_REF_DELETION; i < REFTABLE_NR_REF_VALUETYPES; i++) { @@ -140,7 +141,7 @@ static void test_reftable_ref_record_roundtrip(void) EXPECT(n > 0); /* decode into a non-zero reftable_record to test for leaks. */ - m = reftable_record_decode(&out, key, i, dest, GIT_SHA1_RAWSZ); + m = reftable_record_decode(&out, key, i, dest, GIT_SHA1_RAWSZ, &scratch); EXPECT(n == m); EXPECT(reftable_ref_record_equal(&in.u.ref, &out.u.ref, @@ -150,6 +151,8 @@ static void test_reftable_ref_record_roundtrip(void) strbuf_release(&key); reftable_record_release(&out); } + + strbuf_release(&scratch); } static void test_reftable_log_record_equal(void) @@ -175,7 +178,6 @@ static void test_reftable_log_record_equal(void) static void test_reftable_log_record_roundtrip(void) { int i; - struct reftable_log_record in[] = { { .refname = xstrdup("refs/heads/master"), @@ -202,6 +204,8 @@ static void test_reftable_log_record_roundtrip(void) .value_type = REFTABLE_LOG_UPDATE, } }; + struct strbuf scratch = STRBUF_INIT; + set_test_hash(in[0].value.update.new_hash, 1); set_test_hash(in[0].value.update.old_hash, 2); set_test_hash(in[2].value.update.new_hash, 3); @@ -241,7 +245,7 @@ static void test_reftable_log_record_roundtrip(void) EXPECT(n >= 0); valtype = reftable_record_val_type(&rec); m = reftable_record_decode(&out, key, valtype, dest, - GIT_SHA1_RAWSZ); + GIT_SHA1_RAWSZ, &scratch); EXPECT(n == m); EXPECT(reftable_log_record_equal(&in[i], &out.u.log, @@ -250,6 +254,8 @@ static void test_reftable_log_record_roundtrip(void) strbuf_release(&key); reftable_record_release(&out); } + + strbuf_release(&scratch); } static void test_u24_roundtrip(void) @@ -299,23 +305,27 @@ static void test_reftable_obj_record_roundtrip(void) { uint8_t testHash1[GIT_SHA1_RAWSZ] = { 1, 2, 3, 4, 0 }; uint64_t till9[] = { 1, 2, 3, 4, 500, 600, 700, 800, 9000 }; - struct reftable_obj_record recs[3] = { { - .hash_prefix = testHash1, - .hash_prefix_len = 5, - .offsets = till9, - .offset_len = 3, - }, - { - .hash_prefix = testHash1, - .hash_prefix_len = 5, - .offsets = till9, - .offset_len = 9, - }, - { - .hash_prefix = testHash1, - .hash_prefix_len = 5, - } }; + struct reftable_obj_record recs[3] = { + { + .hash_prefix = testHash1, + .hash_prefix_len = 5, + .offsets = till9, + .offset_len = 3, + }, + { + .hash_prefix = testHash1, + .hash_prefix_len = 5, + .offsets = till9, + .offset_len = 9, + }, + { + .hash_prefix = testHash1, + .hash_prefix_len = 5, + }, + }; + struct strbuf scratch = STRBUF_INIT; int i = 0; + for (i = 0; i < ARRAY_SIZE(recs); i++) { uint8_t buffer[1024] = { 0 }; struct string_view dest = { @@ -339,13 +349,15 @@ static void test_reftable_obj_record_roundtrip(void) EXPECT(n > 0); extra = reftable_record_val_type(&in); m = reftable_record_decode(&out, key, extra, dest, - GIT_SHA1_RAWSZ); + GIT_SHA1_RAWSZ, &scratch); EXPECT(n == m); EXPECT(reftable_record_equal(&in, &out, GIT_SHA1_RAWSZ)); strbuf_release(&key); reftable_record_release(&out); } + + strbuf_release(&scratch); } static void test_reftable_index_record_roundtrip(void) @@ -362,6 +374,7 @@ static void test_reftable_index_record_roundtrip(void) .buf = buffer, .len = sizeof(buffer), }; + struct strbuf scratch = STRBUF_INIT; struct strbuf key = STRBUF_INIT; struct reftable_record out = { .type = BLOCK_TYPE_INDEX, @@ -379,13 +392,15 @@ static void test_reftable_index_record_roundtrip(void) EXPECT(n > 0); extra = reftable_record_val_type(&in); - m = reftable_record_decode(&out, key, extra, dest, GIT_SHA1_RAWSZ); + m = reftable_record_decode(&out, key, extra, dest, GIT_SHA1_RAWSZ, + &scratch); EXPECT(m == n); EXPECT(reftable_record_equal(&in, &out, GIT_SHA1_RAWSZ)); reftable_record_release(&out); strbuf_release(&key); + strbuf_release(&scratch); strbuf_release(&in.u.idx.last_key); } From fcacc2b161b095c99dfd4e0b05dcc1ed8ca80a62 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 5 Mar 2024 13:11:20 +0100 Subject: [PATCH 7/7] refs/reftable: track last log record name via strbuf The reflog iterator enumerates all reflogs known to a ref backend. In the "reftable" backend there is no way to list all existing reflogs directly. Instead, we have to iterate through all reflog entries and discard all those redundant entries for which we have already returned a reflog entry. This logic is implemented by tracking the last reflog name that we have emitted to the iterator's user. If the next log record has the same name we simply skip it until we find another record with a different refname. This last reflog name is stored in a simple C string, which requires us to free and reallocate it whenever we need to update the reflog name. Convert it to use a `struct strbuf` instead, which reduces the number of allocations. Before: HEAP SUMMARY: in use at exit: 13,473 bytes in 122 blocks total heap usage: 1,068,485 allocs, 1,068,363 frees, 281,122,886 bytes allocated After: HEAP SUMMARY: in use at exit: 13,473 bytes in 122 blocks total heap usage: 68,485 allocs, 68,363 frees, 256,234,072 bytes allocated Note that even after this change we still allocate quite a lot of data, even though the number of allocations does not scale with the number of log records anymore. This remainder comes mostly from decompressing the log blocks, where we decompress each block into newly allocated memory. This will be addressed at a later point in time. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/reftable-backend.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 2de57c047a25ab..12960d93ff47c7 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -1575,7 +1575,7 @@ struct reftable_reflog_iterator { struct reftable_ref_store *refs; struct reftable_iterator iter; struct reftable_log_record log; - char *last_name; + struct strbuf last_name; int err; }; @@ -1594,15 +1594,15 @@ static int reftable_reflog_iterator_advance(struct ref_iterator *ref_iterator) * we've already produced this name. This could be faster by * seeking directly to reflog@update_index==0. */ - if (iter->last_name && !strcmp(iter->log.refname, iter->last_name)) + if (!strcmp(iter->log.refname, iter->last_name.buf)) continue; if (check_refname_format(iter->log.refname, REFNAME_ALLOW_ONELEVEL)) continue; - free(iter->last_name); - iter->last_name = xstrdup(iter->log.refname); + strbuf_reset(&iter->last_name); + strbuf_addstr(&iter->last_name, iter->log.refname); iter->base.refname = iter->log.refname; break; @@ -1635,7 +1635,7 @@ static int reftable_reflog_iterator_abort(struct ref_iterator *ref_iterator) (struct reftable_reflog_iterator *)ref_iterator; reftable_log_record_release(&iter->log); reftable_iterator_destroy(&iter->iter); - free(iter->last_name); + strbuf_release(&iter->last_name); free(iter); return ITER_DONE; } @@ -1655,6 +1655,7 @@ static struct reftable_reflog_iterator *reflog_iterator_for_stack(struct reftabl iter = xcalloc(1, sizeof(*iter)); base_ref_iterator_init(&iter->base, &reftable_reflog_iterator_vtable); + strbuf_init(&iter->last_name, 0); iter->refs = refs; ret = refs->err;