diff --git a/CHANGELOG.md b/CHANGELOG.md index 20b0d59..f4a2e05 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased][] +### Added + +- New methods `Git.IterateRefs` and `Git.IterateRemoteRefs` + provide a streaming API for listing refs. + +### Deprecated + +- `Git.ListRefs` and `Git.ListRefsVerbatim` are deprecated + in favor of `Git.IterateRefs`. +- `Git.ListRemoteRefs` is deprecated in favor of `Git.IterateRemoteRefs`. + ### Fixed - `Log` no longer retains data after closing. diff --git a/net.go b/net.go index c69b3e7..7726298 100644 --- a/net.go +++ b/net.go @@ -15,6 +15,8 @@ package git import ( + "bufio" + "bytes" "context" "fmt" "io" @@ -107,27 +109,79 @@ func (g *Git) clone(ctx context.Context, mode string, u *url.URL, opts CloneOpti }) } +// IterateRemoteRefsOptions specifies filters for [Git.IterateRemoteRefs]. +type IterateRemoteRefsOptions struct { + // If IncludeHead is true, the HEAD ref is included. + IncludeHead bool + // LimitToBranches limits the refs to those starting with "refs/heads/". + // This is additive with IncludeHead and LimitToTags. + LimitToBranches bool + // LimitToTags limits the refs to those starting with "refs/tags/". + // This is additive with IncludeHead and LimitToBranches. + LimitToTags bool + // If DereferenceTags is true, + // then the iterator will also produce refs for which [RefIterator.IsDereference] reports true + // that have the object hash of the tag object refers to. + DereferenceTags bool +} + // ListRemoteRefs lists all of the refs in a remote repository. // remote may be a URL or the name of a remote. // // This function may block on user input if the remote requires // credentials. +// +// Deprecated: This method will return an error on repositories with many branches (>100K). +// Use [Git.IterateRemoteRefs] instead. func (g *Git) ListRemoteRefs(ctx context.Context, remote string) (map[Ref]Hash, error) { + return parseRefs(g.IterateRemoteRefs(ctx, remote, IterateRemoteRefsOptions{ + IncludeHead: true, + })) +} + +// IterateRemoteRefs starts listing all of the refs in a remote repository. +// remote may be a URL or the name of a remote. +// +// The iterator may block on user input if the remote requires credentials. +func (g *Git) IterateRemoteRefs(ctx context.Context, remote string, opts IterateRemoteRefsOptions) *RefIterator { // TODO(someday): Add tests. errPrefix := fmt.Sprintf("git ls-remote %q", remote) - out, err := g.output(ctx, errPrefix, []string{"ls-remote", "--quiet", "--", remote}) - if err != nil { - return nil, err + args := []string{"ls-remote", "--quiet"} + if !opts.IncludeHead && !opts.DereferenceTags { + args = append(args, "--refs") } - if len(out) == 0 { - return nil, nil + if opts.LimitToBranches { + args = append(args, "--heads") } - refs, err := parseRefs(out, true) + if opts.LimitToTags { + args = append(args, "--tags") + } + args = append(args, "--", remote) + + ctx, cancel := context.WithCancel(ctx) + stderr := new(bytes.Buffer) + pipe, err := StartPipe(ctx, g.runner, &Invocation{ + Args: args, + Dir: g.dir, + Stderr: &limitWriter{w: stderr, n: errorOutputLimit}, + }) if err != nil { - return refs, fmt.Errorf("%s: %w", errPrefix, err) + cancel() + return &RefIterator{ + scanErr: fmt.Errorf("%s: %w", errPrefix, err), + scanDone: true, + } + } + return &RefIterator{ + scanner: bufio.NewScanner(pipe), + stderr: stderr, + cancelFunc: cancel, + closer: pipe, + errPrefix: errPrefix, + ignoreDerefs: !opts.DereferenceTags, + ignoreHead: !opts.IncludeHead, } - return refs, nil } // A FetchRefspec specifies a mapping from remote refs to local refs. diff --git a/revision.go b/revision.go index debcc70..15a941b 100644 --- a/revision.go +++ b/revision.go @@ -15,10 +15,12 @@ package git import ( + "bufio" "bytes" "context" "errors" "fmt" + "io" "strings" "gg-scm.io/pkg/git/githash" @@ -123,74 +125,234 @@ func (g *Git) ParseRev(ctx context.Context, refspec string) (*Rev, error) { } // ListRefs lists all of the refs in the repository with tags dereferenced. +// +// Deprecated: This method will return an error on repositories with many branches (>100K). +// Use [Git.IterateRefs] instead. func (g *Git) ListRefs(ctx context.Context) (map[Ref]Hash, error) { + return parseRefs(g.IterateRefs(ctx, IterateRefsOptions{ + IncludeHead: true, + DereferenceTags: true, + })) +} + +// ListRefsVerbatim lists all of the refs in the repository. +// Tags will not be dereferenced. +// +// Deprecated: This method will return an error on repositories with many branches (>100K). +// Use [Git.IterateRefs] instead. +func (g *Git) ListRefsVerbatim(ctx context.Context) (map[Ref]Hash, error) { + return parseRefs(g.IterateRefs(ctx, IterateRefsOptions{ + IncludeHead: true, + })) +} + +func parseRefs(iter *RefIterator) (map[Ref]Hash, error) { + defer iter.Close() + + const maxCount = 164000 // approximately 10 MiB, assuming 64 bytes per record + + refs := make(map[Ref]Hash) + tags := make(map[Ref]bool) + for iter.Next() { + if iter.IsDereference() { + // Dereferenced tag. This takes precedence over the previous hash stored in the map. + if tags[iter.Ref()] { + return refs, fmt.Errorf("parse refs: multiple hashes found for tag %v", iter.Ref()) + } + tags[iter.Ref()] = true + } else if _, exists := refs[iter.Ref()]; exists { + return refs, fmt.Errorf("parse refs: multiple hashes found for %v", iter.Ref()) + } else if len(refs) >= maxCount { + return refs, fmt.Errorf("parse refs: too many refs") + } + refs[iter.Ref()] = iter.ObjectSHA1() + } + return refs, iter.Close() +} + +// IterateRefsOptions specifies filters for [Git.IterateRefs]. +type IterateRefsOptions struct { + // If IncludeHead is true, the HEAD ref is included. + IncludeHead bool + // LimitToBranches limits the refs to those starting with "refs/heads/". + // This is additive with IncludeHead and LimitToTags. + LimitToBranches bool + // LimitToTags limits the refs to those starting with "refs/tags/". + // This is additive with IncludeHead and LimitToBranches. + LimitToTags bool + // If DereferenceTags is true, + // then the iterator will also produce refs for which [RefIterator.IsDereference] reports true + // that have the object hash of the tag object refers to. + DereferenceTags bool +} + +// IterateRefs starts listing all of the refs in the repository. +func (g *Git) IterateRefs(ctx context.Context, opts IterateRefsOptions) *RefIterator { const errPrefix = "git show-ref" - out, err := g.output(ctx, errPrefix, []string{"show-ref", "--dereference", "--head"}) + args := []string{"show-ref"} + if opts.IncludeHead { + args = append(args, "--head") + } + if opts.LimitToBranches { + args = append(args, "--heads") + } + if opts.LimitToTags { + args = append(args, "--tags") + } + if opts.DereferenceTags { + args = append(args, "--dereference") + } + + ctx, cancel := context.WithCancel(ctx) + stderr := new(bytes.Buffer) + pipe, err := StartPipe(ctx, g.runner, &Invocation{ + Args: args, + Dir: g.dir, + Stderr: &limitWriter{w: stderr, n: errorOutputLimit}, + }) if err != nil { - if exitCode(err) == 1 && len(out) == 0 { - return nil, nil + cancel() + return &RefIterator{ + scanErr: fmt.Errorf("%s: %w", errPrefix, err), + scanDone: true, } - return nil, err } - refs, err := parseRefs(out, false) - if err != nil { - return refs, fmt.Errorf("%s: %w", errPrefix, err) + return &RefIterator{ + scanner: bufio.NewScanner(pipe), + stderr: stderr, + cancelFunc: cancel, + closer: pipe, + errPrefix: errPrefix, + ignoreDerefs: !opts.DereferenceTags, + ignoreHead: !opts.IncludeHead, + ignoreExit1: true, } - return refs, nil } -// ListRefsVerbatim lists all of the refs in the repository. Tags will not be -// dereferenced. -func (g *Git) ListRefsVerbatim(ctx context.Context) (map[Ref]Hash, error) { - const errPrefix = "git show-ref" - out, err := g.output(ctx, errPrefix, []string{"show-ref", "--head"}) - if err != nil { - return nil, err +// RefIterator is an open handle to a Git subprocess that lists refs. +// Closing the iterator stops the subprocess. +type RefIterator struct { + scanner *bufio.Scanner + stderr *bytes.Buffer + cancelFunc context.CancelFunc + closer io.Closer + errPrefix string + ignoreDerefs bool + ignoreHead bool + ignoreExit1 bool + + scanErr error + scanDone bool + hasResults bool + ref Ref + hash githash.SHA1 + deref bool +} + +// Next attempts to scan the next ref and reports whether one exists. +func (iter *RefIterator) Next() bool { + if iter.scanDone { + return false } - refs, err := parseRefs(out, true) + err := iter.next() if err != nil { - return refs, fmt.Errorf("%s: %w", errPrefix, err) + iter.cancel() + iter.scanErr = err + if errors.Is(err, io.EOF) { + iter.scanErr = nil + } + return false } - return refs, nil + iter.hasResults = true + return true } -func parseRefs(out string, ignoreDerefs bool) (map[Ref]Hash, error) { - refs := make(map[Ref]Hash) - tags := make(map[Ref]bool) +func (iter *RefIterator) next() error { isSpace := func(c rune) bool { return c == ' ' || c == '\t' } - for len(out) > 0 { - eol := strings.IndexByte(out, '\n') - if eol == -1 { - return refs, errors.New("parse refs: unexpected EOF") - } - line := out[:eol] - out = out[eol+1:] - sp := strings.IndexFunc(line, isSpace) + for iter.scanner.Scan() { + line := iter.scanner.Bytes() + sp := bytes.IndexAny(line, " \t") if sp == -1 { - return refs, fmt.Errorf("parse refs: could not parse line %q", line) + return fmt.Errorf("parse refs: could not parse line %q", line) } - h, err := ParseHash(line[:sp]) - if err != nil { - return refs, fmt.Errorf("parse refs: hash of ref %q: %w", line[sp+1:], err) + refBytes := bytes.TrimLeftFunc(line[sp+1:], isSpace) + const derefSuffix = "^{}" + iter.deref = len(refBytes) >= len(derefSuffix) && + string(refBytes[len(refBytes)-len(derefSuffix):]) == derefSuffix + if iter.deref { + refBytes = refBytes[:len(refBytes)-len(derefSuffix)] } - ref := Ref(strings.TrimLeftFunc(line[sp+1:], isSpace)) - if strings.HasSuffix(string(ref), "^{}") { - // Dereferenced tag. This takes precedence over the previous hash stored in the map. - if ignoreDerefs { - continue - } - ref = ref[:len(ref)-3] - if tags[ref] { - return refs, fmt.Errorf("parse refs: multiple hashes found for tag %v", ref) + ref := Ref(refBytes) + if (!iter.deref || !iter.ignoreDerefs) && (ref != Head || !iter.ignoreHead) { + if err := iter.hash.UnmarshalText(line[:sp]); err != nil { + return fmt.Errorf("parse refs: hash of ref %q: %w", line[sp+1:], err) } - tags[ref] = true - } else if _, exists := refs[ref]; exists { - return refs, fmt.Errorf("parse refs: multiple hashes found for %v", ref) + iter.ref = ref + return nil } - refs[ref] = h } - return refs, nil + + if err := iter.scanner.Err(); err != nil { + return err + } + // Reached successful end. Wait for subprocess to exit. + if err := iter.close(); err != nil { + return err + } + return io.EOF +} + +// Ref returns the current ref. +// [RefIterator.Next] must be called at least once before calling Ref. +func (iter *RefIterator) Ref() Ref { + return iter.ref +} + +// ObjectSHA1 returns the SHA-1 hash of the Git object +// the current ref refers to. +// [RefIterator.Next] must be called at least once before calling ObjectSHA1. +func (iter *RefIterator) ObjectSHA1() githash.SHA1 { + return iter.hash +} + +// IsDereference reports whether the value of [RefIterator.ObjectSHA1] +// represents the target of a tag object. +func (iter *RefIterator) IsDereference() bool { + return iter.deref +} + +// Close ends the Git subprocess and waits for it to finish. +// Close returns an error if [RefIterator.Next] returned false +// due to a parse failure. +// Subsequent calls to Close will no-op and return the same error. +func (iter *RefIterator) Close() error { + iter.cancel() + iter.close() // Ignore error, since it's from interrupting. + return iter.scanErr +} + +func (iter *RefIterator) cancel() { + if iter.cancelFunc != nil { + iter.cancelFunc() + } + iter.scanner = nil + iter.scanDone = true + iter.ref = "" + iter.hash = Hash{} + iter.deref = false +} + +func (iter *RefIterator) close() error { + if iter.closer == nil { + return nil + } + err := iter.closer.Close() + iter.closer = nil + if err != nil && !(iter.ignoreExit1 && !iter.hasResults && exitCode(err) == 1) { + return commandError(iter.errPrefix, err, iter.stderr.Bytes()) + } + return nil } // A RefMutation describes an operation to perform on a ref. The zero value is