From b970509c59f3d5d684129dec9237ceb2e3084e1d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 12 Nov 2024 03:34:00 -0500 Subject: [PATCH 1/3] fetch: adjust refspec->raw_nr when filtering prefetch refspecs In filter_prefetch_refspecs(), we may remove one or more refspecs if they point into refs/tags/. When we do, we remove the item from the refspec->items array, shifting subsequent items down, and then decrement the refspec->nr count. We also remove the item from the refspec->raw array, but fail to decrement refspec->raw_nr. This leaves us with a count that is too high, and anybody looking at the "raw" array will erroneously see either: 1. The removed entry, if there were no subsequent items to shift down. 2. A duplicate of the final entry, as everything is shifted down but there was nothing to overwrite the final item. The obvious culprit to run into this is calling refspec_clear(), which will try to free the removed entry (case 1) or double-free the final entry (case 2). But even though the bug has existed since the function was added in 2e03115d0c (fetch: add --prefetch option, 2021-04-16), we did not trigger it in the test suite. The --prefetch option is normally only used with configured refspecs, and we never bother to call refspec_clear() on those (they are stored as part of a struct remote, which is held in a global variable). But you could trigger case 2 manually like: git fetch --prefetch . refs/tags/foo refs/tags/bar Ironically you couldn't trigger case 1, because the code accidentally leaked the string in the raw array, and the two bugs (the leak and the double-free) cancelled out. But when we fixed the leak in ea4780307c (fetch: free "raw" string when shrinking refspec, 2024-09-24), it became possible to trigger that, too, with a single item: git fetch --prefetch . refs/tags/foo We can fix both cases by just correctly decrementing "raw_nr" when we shrink the array. Even though we don't expect people to use --prefetch with command-line refspecs, we'll add a test to make sure it behaves well (like the test just before it, we're just confirming that the filtered prefetch succeeds at all). Reported-by: Eric Mills Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fetch.c | 1 + t/t5582-fetch-negative-refspec.sh | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/builtin/fetch.c b/builtin/fetch.c index 80a64d0d269ed6..1c59e1c256c151 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -463,6 +463,7 @@ static void filter_prefetch_refspec(struct refspec *rs) rs->raw[j - 1] = rs->raw[j]; } rs->nr--; + rs->raw_nr--; i--; continue; } diff --git a/t/t5582-fetch-negative-refspec.sh b/t/t5582-fetch-negative-refspec.sh index 7fa54a4029ad44..b21bf2057d60fe 100755 --- a/t/t5582-fetch-negative-refspec.sh +++ b/t/t5582-fetch-negative-refspec.sh @@ -283,4 +283,8 @@ test_expect_success '--prefetch succeeds when refspec becomes empty' ' git -C one fetch --prefetch ' +test_expect_success '--prefetch succeeds with empty command line refspec' ' + git -C one fetch --prefetch origin +refs/tags/extra +' + test_done From d36af33081cac89a840f4a4cbc4af1ba4aa40392 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 12 Nov 2024 03:36:10 -0500 Subject: [PATCH 2/3] refspec: drop separate raw_nr count A refspec struct contains zero or more refspec_item structs, along with matching "raw" strings. The items and raw strings are kept in separate arrays, but those arrays will always have the same length (because we write them only via refspec_append_nodup(), which grows both). This can lead to bugs when manipulating the array, since the arrays and lengths must be modified in lockstep. For example, the bug fixed in the previous commit, which forgot to decrement raw_nr. So let's get rid of "raw_nr" and have only "nr", making this kind of bug impossible (and also making it clear that the two are always matched, something that existing code already assumed but was not guaranteed by the interface). Even though we'd expect "alloc" and "raw_alloc" to likewise move in lockstep, we still need to keep separate counts there if we want to continue to use ALLOC_GROW() for both. Conceptually this would all be simpler if refspec_item just held onto its own raw string, and we had a single array. But there are callers which use refspec_item outside of "struct refspec" (and so don't hold on to a matching "raw" string at all), which we'd possibly need to adjust. So let's not worry about refactoring that for now, and just get rid of the redundant count variable. That is the first step on the road to combining them anyway. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fetch.c | 1 - builtin/remote.c | 8 ++++---- refspec.c | 15 ++++++++------- refspec.h | 1 - submodule.c | 4 ++-- 5 files changed, 14 insertions(+), 15 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index 1c59e1c256c151..80a64d0d269ed6 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -463,7 +463,6 @@ static void filter_prefetch_refspec(struct refspec *rs) rs->raw[j - 1] = rs->raw[j]; } rs->nr--; - rs->raw_nr--; i--; continue; } diff --git a/builtin/remote.c b/builtin/remote.c index 76670ddd8b44e5..875d6c3badcf72 100644 --- a/builtin/remote.c +++ b/builtin/remote.c @@ -633,11 +633,11 @@ static int migrate_file(struct remote *remote) git_config_set_multivar(buf.buf, remote->url.v[i], "^$", 0); strbuf_reset(&buf); strbuf_addf(&buf, "remote.%s.push", remote->name); - for (i = 0; i < remote->push.raw_nr; i++) + for (i = 0; i < remote->push.nr; i++) git_config_set_multivar(buf.buf, remote->push.raw[i], "^$", 0); strbuf_reset(&buf); strbuf_addf(&buf, "remote.%s.fetch", remote->name); - for (i = 0; i < remote->fetch.raw_nr; i++) + for (i = 0; i < remote->fetch.nr; i++) git_config_set_multivar(buf.buf, remote->fetch.raw[i], "^$", 0); if (remote->origin == REMOTE_REMOTES) unlink_or_warn(git_path("remotes/%s", remote->name)); @@ -759,12 +759,12 @@ static int mv(int argc, const char **argv, const char *prefix) goto out; } - if (oldremote->fetch.raw_nr) { + if (oldremote->fetch.nr) { strbuf_reset(&buf); strbuf_addf(&buf, "remote.%s.fetch", rename.new_name); git_config_set_multivar(buf.buf, NULL, NULL, CONFIG_FLAGS_MULTI_REPLACE); strbuf_addf(&old_remote_context, ":refs/remotes/%s/", rename.old_name); - for (i = 0; i < oldremote->fetch.raw_nr; i++) { + for (i = 0; i < oldremote->fetch.nr; i++) { char *ptr; strbuf_reset(&buf2); diff --git a/refspec.c b/refspec.c index c3cf003443dd36..8e8ee8542dd89a 100644 --- a/refspec.c +++ b/refspec.c @@ -186,10 +186,12 @@ static void refspec_append_nodup(struct refspec *rs, char *refspec) refspec_item_init_or_die(&item, refspec, rs->fetch); ALLOC_GROW(rs->items, rs->nr + 1, rs->alloc); - rs->items[rs->nr++] = item; + rs->items[rs->nr] = item; - ALLOC_GROW(rs->raw, rs->raw_nr + 1, rs->raw_alloc); - rs->raw[rs->raw_nr++] = refspec; + ALLOC_GROW(rs->raw, rs->nr + 1, rs->raw_alloc); + rs->raw[rs->nr] = refspec; + + rs->nr++; } void refspec_append(struct refspec *rs, const char *refspec) @@ -217,18 +219,17 @@ void refspec_clear(struct refspec *rs) { int i; - for (i = 0; i < rs->nr; i++) + for (i = 0; i < rs->nr; i++) { refspec_item_clear(&rs->items[i]); + free(rs->raw[i]); + } FREE_AND_NULL(rs->items); rs->alloc = 0; rs->nr = 0; - for (i = 0; i < rs->raw_nr; i++) - free(rs->raw[i]); FREE_AND_NULL(rs->raw); rs->raw_alloc = 0; - rs->raw_nr = 0; rs->fetch = 0; } diff --git a/refspec.h b/refspec.h index 3760fdaf2bb183..0461c9def6261c 100644 --- a/refspec.h +++ b/refspec.h @@ -45,7 +45,6 @@ struct refspec { char **raw; int raw_alloc; - int raw_nr; int fetch; }; diff --git a/submodule.c b/submodule.c index 74d5766f07c311..307f73fb5b13b7 100644 --- a/submodule.c +++ b/submodule.c @@ -1174,7 +1174,7 @@ static int push_submodule(const char *path, if (remote->origin != REMOTE_UNCONFIGURED) { int i; strvec_push(&cp.args, remote->name); - for (i = 0; i < rs->raw_nr; i++) + for (i = 0; i < rs->nr; i++) strvec_push(&cp.args, rs->raw[i]); } @@ -1209,7 +1209,7 @@ static void submodule_push_check(const char *path, const char *head, strvec_push(&cp.args, head); strvec_push(&cp.args, remote->name); - for (i = 0; i < rs->raw_nr; i++) + for (i = 0; i < rs->nr; i++) strvec_push(&cp.args, rs->raw[i]); prepare_submodule_repo_env(&cp.env); From fe17a25905f701ce91505851eb2bb213bb39edbe Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 12 Nov 2024 03:39:37 -0500 Subject: [PATCH 3/3] refspec: store raw refspecs inside refspec_item The refspec struct keeps two matched arrays: one for the refspec_item structs and one for the original raw refspec strings. The main reason for this is that there are other users of refspec_item that do not care about the raw strings. But it does make managing the refspec struct awkward, as we must keep the two arrays in sync. This has led to bugs in the past (both leaks and double-frees). Let's just store a copy of the raw refspec string directly in each refspec_item struct. This simplifies the handling at a small cost: 1. Direct callers of refspec_item_init() will now get an extra copy of the refspec string, even if they don't need it. This should be negligible, as the struct is already allocating two strings for the parsed src/dst values (and we tend to only do it sparingly anyway for things like the TAG_REFSPEC literal). 2. Users of refspec_appendf() will now generate a temporary string, copy it, and then free the result (versus handing off ownership of the temporary string). We could get around this by having a "nodup" variant of refspec_item_init(), but it doesn't seem worth the extra complexity for something that is not remotely a hot code path. Code which accesses refspec->raw now needs to look at refspec->item.raw. Other callers which just use refspec_item directly can remain the same. We'll free the allocated string in refspec_item_clear(), which they should be calling anyway to free src/dst. One subtle note: refspec_item_init() can return an error, in which case we'll still have set its "raw" field. But that is also true of the "src" and "dst" fields, so any caller which does not _clear() the failed item is already potentially leaking. In practice most code just calls die() on an error anyway, but you can see the exception in valid_fetch_refspec(), which does correctly call _clear() even on error. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fetch.c | 8 ++------ builtin/remote.c | 8 ++++---- refspec.c | 25 +++++++++---------------- refspec.h | 5 ++--- submodule.c | 4 ++-- 5 files changed, 19 insertions(+), 31 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index 80a64d0d269ed6..ed273be62cad72 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -454,14 +454,10 @@ static void filter_prefetch_refspec(struct refspec *rs) ref_namespace[NAMESPACE_TAGS].ref))) { int j; - free(rs->items[i].src); - free(rs->items[i].dst); - free(rs->raw[i]); + refspec_item_clear(&rs->items[i]); - for (j = i + 1; j < rs->nr; j++) { + for (j = i + 1; j < rs->nr; j++) rs->items[j - 1] = rs->items[j]; - rs->raw[j - 1] = rs->raw[j]; - } rs->nr--; i--; continue; diff --git a/builtin/remote.c b/builtin/remote.c index 875d6c3badcf72..909360096513fc 100644 --- a/builtin/remote.c +++ b/builtin/remote.c @@ -377,7 +377,7 @@ static int get_ref_states(const struct ref *remote_refs, struct ref_states *stat for (i = 0; i < states->remote->fetch.nr; i++) if (get_fetch_map(remote_refs, &states->remote->fetch.items[i], &tail, 1)) die(_("Could not get fetch map for refspec %s"), - states->remote->fetch.raw[i]); + states->remote->fetch.items[i].raw); for (ref = fetch_map; ref; ref = ref->next) { if (omit_name_by_refspec(ref->name, &states->remote->fetch)) @@ -634,11 +634,11 @@ static int migrate_file(struct remote *remote) strbuf_reset(&buf); strbuf_addf(&buf, "remote.%s.push", remote->name); for (i = 0; i < remote->push.nr; i++) - git_config_set_multivar(buf.buf, remote->push.raw[i], "^$", 0); + git_config_set_multivar(buf.buf, remote->push.items[i].raw, "^$", 0); strbuf_reset(&buf); strbuf_addf(&buf, "remote.%s.fetch", remote->name); for (i = 0; i < remote->fetch.nr; i++) - git_config_set_multivar(buf.buf, remote->fetch.raw[i], "^$", 0); + git_config_set_multivar(buf.buf, remote->fetch.items[i].raw, "^$", 0); if (remote->origin == REMOTE_REMOTES) unlink_or_warn(git_path("remotes/%s", remote->name)); else if (remote->origin == REMOTE_BRANCHES) @@ -768,7 +768,7 @@ static int mv(int argc, const char **argv, const char *prefix) char *ptr; strbuf_reset(&buf2); - strbuf_addstr(&buf2, oldremote->fetch.raw[i]); + strbuf_addstr(&buf2, oldremote->fetch.items[i].raw); ptr = strstr(buf2.buf, old_remote_context.buf); if (ptr) { refspec_updated = 1; diff --git a/refspec.c b/refspec.c index 8e8ee8542dd89a..994901f55b8e37 100644 --- a/refspec.c +++ b/refspec.c @@ -153,6 +153,7 @@ static int parse_refspec(struct refspec_item *item, const char *refspec, int fet int refspec_item_init(struct refspec_item *item, const char *refspec, int fetch) { memset(item, 0, sizeof(*item)); + item->raw = xstrdup(refspec); return parse_refspec(item, refspec, fetch); } @@ -167,6 +168,7 @@ void refspec_item_clear(struct refspec_item *item) { FREE_AND_NULL(item->src); FREE_AND_NULL(item->dst); + FREE_AND_NULL(item->raw); item->force = 0; item->pattern = 0; item->matching = 0; @@ -179,7 +181,7 @@ void refspec_init(struct refspec *rs, int fetch) rs->fetch = fetch; } -static void refspec_append_nodup(struct refspec *rs, char *refspec) +void refspec_append(struct refspec *rs, const char *refspec) { struct refspec_item item; @@ -188,24 +190,20 @@ static void refspec_append_nodup(struct refspec *rs, char *refspec) ALLOC_GROW(rs->items, rs->nr + 1, rs->alloc); rs->items[rs->nr] = item; - ALLOC_GROW(rs->raw, rs->nr + 1, rs->raw_alloc); - rs->raw[rs->nr] = refspec; - rs->nr++; } -void refspec_append(struct refspec *rs, const char *refspec) -{ - refspec_append_nodup(rs, xstrdup(refspec)); -} - void refspec_appendf(struct refspec *rs, const char *fmt, ...) { va_list ap; + char *buf; va_start(ap, fmt); - refspec_append_nodup(rs, xstrvfmt(fmt, ap)); + buf = xstrvfmt(fmt, ap); va_end(ap); + + refspec_append(rs, buf); + free(buf); } void refspec_appendn(struct refspec *rs, const char **refspecs, int nr) @@ -219,18 +217,13 @@ void refspec_clear(struct refspec *rs) { int i; - for (i = 0; i < rs->nr; i++) { + for (i = 0; i < rs->nr; i++) refspec_item_clear(&rs->items[i]); - free(rs->raw[i]); - } FREE_AND_NULL(rs->items); rs->alloc = 0; rs->nr = 0; - FREE_AND_NULL(rs->raw); - rs->raw_alloc = 0; - rs->fetch = 0; } diff --git a/refspec.h b/refspec.h index 0461c9def6261c..69d693c87d31a0 100644 --- a/refspec.h +++ b/refspec.h @@ -26,6 +26,8 @@ struct refspec_item { char *src; char *dst; + + char *raw; }; #define REFSPEC_FETCH 1 @@ -43,9 +45,6 @@ struct refspec { int alloc; int nr; - char **raw; - int raw_alloc; - int fetch; }; diff --git a/submodule.c b/submodule.c index 307f73fb5b13b7..7ec564854d0339 100644 --- a/submodule.c +++ b/submodule.c @@ -1175,7 +1175,7 @@ static int push_submodule(const char *path, int i; strvec_push(&cp.args, remote->name); for (i = 0; i < rs->nr; i++) - strvec_push(&cp.args, rs->raw[i]); + strvec_push(&cp.args, rs->items[i].raw); } prepare_submodule_repo_env(&cp.env); @@ -1210,7 +1210,7 @@ static void submodule_push_check(const char *path, const char *head, strvec_push(&cp.args, remote->name); for (i = 0; i < rs->nr; i++) - strvec_push(&cp.args, rs->raw[i]); + strvec_push(&cp.args, rs->items[i].raw); prepare_submodule_repo_env(&cp.env); cp.git_cmd = 1;