Skip to content

Commit

Permalink
[RB] Finish refactoring out go-git from CLI (#7989)
Browse files Browse the repository at this point in the history
Our usage of go-git in the remote bazel CLI doesn't handle a series of
edge cases and has resulted in some performance problems. This PR is a
continuation of #7933

Also:
* Fixes an assumption that the default branch had to be named `main` or
`master`
* Tries to minimize slow remote git calls by making one `git ls-remote
--symref origin` call at the beginning, and passing the output to helper
function
* Fixes a bug where the flags weren't parsed when we were trying to use
them

Fixes #7896
Fixes buildbuddy-io/buildbuddy-internal#3717
  • Loading branch information
maggie-lou authored Dec 4, 2024
1 parent 8c219d2 commit 70e2531
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 96 deletions.
2 changes: 0 additions & 2 deletions cli/remotebazel/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ go_library(
"//server/util/shlex",
"//server/util/status",
"@com_github_alecaivazis_survey_v2//:survey",
"@com_github_go_git_go_git_v5//:go-git",
"@com_github_go_git_go_git_v5//plumbing",
"@org_golang_google_genproto_googleapis_bytestream//:bytestream",
"@org_golang_google_grpc//metadata",
"@org_golang_x_sync//errgroup",
Expand Down
177 changes: 91 additions & 86 deletions cli/remotebazel/remotebazel.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"bytes"
"context"
"encoding/json"
"errors"
"flag"
"fmt"
"io"
Expand Down Expand Up @@ -37,8 +36,6 @@ import (
"github.com/buildbuddy-io/buildbuddy/server/util/rexec"
"github.com/buildbuddy-io/buildbuddy/server/util/shlex"
"github.com/buildbuddy-io/buildbuddy/server/util/status"
"github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/plumbing"
"golang.org/x/sync/errgroup"
"golang.org/x/sys/unix"
"google.golang.org/grpc/metadata"
Expand Down Expand Up @@ -89,8 +86,6 @@ var (
runFromBranch = RemoteFlagset.String("run_from_branch", "", "A GitHub branch to base the remote run off. If unset, the remote workspace will mirror your local workspace.")
runFromCommit = RemoteFlagset.String("run_from_commit", "", "A GitHub commit SHA to base the remote run off. If unset, the remote workspace will mirror your local workspace.")
script = RemoteFlagset.String("script", "", "Shell code to run remotely instead of a Bazel command.")

defaultBranchRefs = []string{"refs/heads/main", "refs/heads/master"}
)

func consoleCursorMoveUp(y int) {
Expand Down Expand Up @@ -129,7 +124,6 @@ type gitRemote struct {
}

type RepoConfig struct {
Root string
URL string
Ref string
CommitSHA string
Expand Down Expand Up @@ -235,28 +229,19 @@ func parseRemote(s string) (*gitRemote, error) {

}

func determineDefaultBranch(repo *git.Repository) (string, error) {
branches, err := repo.Branches()
if err != nil {
return "", status.UnknownErrorf("could not list branches: %s", err)
}

allBranches := make(map[string]struct{})
err = branches.ForEach(func(branch *plumbing.Reference) error {
allBranches[string(branch.Name())] = struct{}{}
return nil
})
if err != nil {
return "", status.UnknownErrorf("could not iterate over branches: %s", err)
}

for _, defaultBranch := range defaultBranchRefs {
if _, ok := allBranches[defaultBranch]; ok {
return defaultBranch, nil
}
}

return "", status.NotFoundErrorf("could not determine default branch")
// determineDefaultBranch parses `remoteData` (the output from `git ls-remote --symref origin`)
// and returns the HEAD branch for the repo (often `main` or `master).
//
// We expect `remoteData` to contain a string looking like
// `ref: refs/heads/main HEAD`
// and this function would return `main`.
func determineDefaultBranch(remoteData string) (string, error) {
re := regexp.MustCompile(`ref: refs/heads/(\S+)\s+HEAD`)
match := re.FindStringSubmatch(remoteData)
if len(match) > 1 {
return match[1], nil
}
return "", status.NotFoundErrorf("Failed to parse default branch from:\n%s", remoteData)
}

func runGit(args ...string) (string, error) {
Expand All @@ -274,10 +259,7 @@ func runCommand(name string, args ...string) (string, error) {
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
if _, ok := err.(*exec.ExitError); ok {
return stdout.String(), err
}
return stdout.String(), status.UnknownErrorf("error running %s %s: %s\n%s", name, args, err, stderr.String())
return stdout.String(), status.UnknownErrorf("error running %s %s: %s\n%s", name, strings.Join(args, " "), err, stderr.String())
}
return stdout.String(), nil
}
Expand All @@ -303,45 +285,40 @@ func diffUntrackedFile(path string) (string, error) {
}
patch, err := runGit(args...)
if err != nil {
if _, ok := err.(*exec.ExitError); ok {
return patch, nil
// `git diff` returns exit code 1 if there is (valid) diff. Explicitly
// check for this case.
if !strings.Contains(patch, "diff --git") {
return "", err
}
return "", err
}

return patch, nil
}

func Config(path string) (*RepoConfig, error) {
repo, err := git.PlainOpenWithOptions(path, &git.PlainOpenOptions{DetectDotGit: true})
if err != nil {
return nil, status.WrapError(err, "open git repo")
}

func Config() (*RepoConfig, error) {
remote, err := determineRemote()
if err != nil {
return nil, status.WrapError(err, "determine remote")
}
fetchURL := remote.url
log.Debugf("Using fetch URL: %s", fetchURL)

branch, commit, err := getBaseBranchAndCommit(repo)
remoteData, err := runGit("ls-remote", "--symref", remote.name)
if err != nil {
return nil, status.WrapError(err, "get base branch and commit")
return nil, status.WrapErrorf(err, "git remote show %s", remote.name)
}

defaultBranch, err := determineDefaultBranch(repo)
branch, commit, err := getBaseBranchAndCommit(remoteData)
if err != nil {
log.Warnf("Failed to fetch default branch: %s", err)
return nil, status.WrapError(err, "get base branch and commit")
}

wt, err := repo.Worktree()
defaultBranch, err := determineDefaultBranch(remoteData)
if err != nil {
return nil, status.UnknownErrorf("could not determine git repo root")
log.Warnf("Failed to fetch default branch: %s", err)
}

repoConfig := &RepoConfig{
Root: wt.Filesystem.Root(),
URL: fetchURL,
CommitSHA: commit,
Ref: branch,
Expand All @@ -361,36 +338,21 @@ func Config(path string) (*RepoConfig, error) {

// getBaseBranchAndCommit returns the git branch and commit that the remote run
// should be based off
func getBaseBranchAndCommit(repo *git.Repository) (branch string, commit string, err error) {
//
// remoteData is the output from `git remote show origin`
func getBaseBranchAndCommit(remoteData string) (branch string, commit string, err error) {
branch = *runFromBranch
commit = *runFromCommit
if branch != "" || commit != "" {
return branch, commit, nil
}

head, err := repo.Head()
currentBranch, err := getCurrentRef()
if err != nil {
return "", "", status.WrapError(err, "get repo head")
return "", "", err
}

currentBranch := head.Name().Short()
if !head.Name().IsBranch() {
// Handle detached head state
detachedHeadOutput, _ := runGit("branch")
regex := regexp.MustCompile(".*detached at ([^)]+).*")
matches := regex.FindStringSubmatch(detachedHeadOutput)
if len(matches) != 2 {
return "", "", status.UnknownErrorf("unexpected branch state %s", detachedHeadOutput)
}
currentBranch = matches[1]
}

remoteBranchOutput, err := runGit("ls-remote", "origin", currentBranch)
if err != nil {
return "", "", status.WrapError(err, fmt.Sprintf("check if branch %s exists remotely", currentBranch))
}
currentBranchExistsRemotely := remoteBranchOutput != ""

currentBranchExistsRemotely := branchExistsRemotely(remoteData, currentBranch)
if currentBranchExistsRemotely {
branch = currentBranch

Expand All @@ -408,31 +370,26 @@ func getBaseBranchAndCommit(repo *git.Repository) (branch string, commit string,
if currentCommitExistsRemotely {
commit = currentCommitHash
} else {
// Get the head commit of the remote branch
remoteHeadOutput, err := runGit("ls-remote", "--heads", "origin", branch)
remoteHeadCommit, err := getHeadCommitForRemoteBranch(remoteData, branch)
if err != nil {
return "", "", status.WrapError(err, fmt.Sprintf("get remote head of %s", branch))
return "", "", err
}
remoteHeadParsed := strings.Fields(remoteHeadOutput)
if len(remoteHeadParsed) < 1 {
return "", "", errors.New("unexpected remote head output: " + remoteHeadOutput)
}
commit = remoteHeadParsed[0]
commit = remoteHeadCommit
}
} else {
// If the current branch does not exist remotely, the remote runner will
// not be able to fetch it. In this case, use the default branch for the repo
defaultBranch, err := determineDefaultBranch(repo)
defaultBranch, err := determineDefaultBranch(remoteData)
if err != nil {
return "", "", status.WrapError(err, "get default branch")
}
branch = defaultBranch

defaultBranchCommitHash, err := repo.ResolveRevision(plumbing.Revision(defaultBranch))
defaultBranchCommitHash, err := getHeadCommitForRemoteBranch(remoteData, defaultBranch)
if err != nil {
return "", "", status.WrapError(err, "get default branch commit hash")
}
commit = defaultBranchCommitHash.String()
commit = defaultBranchCommitHash
}

log.Debugf("Using base branch: %s", branch)
Expand All @@ -441,6 +398,54 @@ func getBaseBranchAndCommit(repo *git.Repository) (branch string, commit string,
return branch, commit, nil
}

// getCurrentRef returns the current branch, or the current commit if in a detached
// HEAD state
func getCurrentRef() (string, error) {
currentBranch, err := runGit("symbolic-ref", "--short", "HEAD")
if err == nil {
return strings.TrimSpace(currentBranch), nil
} else if !strings.Contains(err.Error(), "ref HEAD is not a symbolic ref") {
return "", status.WrapError(err, "get current branch")
}

// Handle detached head state
detachedHeadOutput, _ := runGit("branch")
regex := regexp.MustCompile(".*detached at ([^)]+).*")
matches := regex.FindStringSubmatch(detachedHeadOutput)
if len(matches) != 2 {
return "", status.UnknownErrorf("unexpected branch state %s", detachedHeadOutput)
}
return strings.TrimSpace(matches[1]), nil
}

// branchExistsRemotely parses `remoteData` (the output from “git ls-remote --symref origin)
// and returns whether `branch` is tracked remotely.
//
// If the branch is tracked remotely, we expect `remoteData` to contain a string looking like
// `abc123 refs/heads/my_branch`
func branchExistsRemotely(remoteData string, branch string) bool {
regex := fmt.Sprintf("\\brefs/heads/%s\\b", branch)
re := regexp.MustCompile(regex)
return re.MatchString(remoteData)
}

// getHeadCommitForRemoteBranch parses `remoteData` (the output from “git ls-remote --symref origin)
// and returns the commit at HEAD for the remote branch.
//
// We expect `remoteData` to contain a string looking like
//
// `abc123 refs/heads/my_branch`
// and this function would return `abc123`.
func getHeadCommitForRemoteBranch(remoteData string, branch string) (string, error) {
regex := `\n(\S+)\s+refs/heads/` + branch + `\n`
re := regexp.MustCompile(regex)
match := re.FindStringSubmatch(remoteData)
if len(match) > 1 {
return match[1], nil
}
return "", status.NotFoundErrorf("Failed to get HEAD commit for branch %s from:\n%s", branch, remoteData)
}

// generates diffs between the current state of the repo and `baseCommit`
func generatePatches(baseCommit string) ([][]byte, error) {
modifiedFiles, err := runGit("diff", baseCommit, "--name-only")
Expand Down Expand Up @@ -995,6 +1000,11 @@ func Run(ctx context.Context, opts RunOpts, repoConfig *RepoConfig) (int, error)
}

func HandleRemoteBazel(commandLineArgs []string) (int, error) {
commandLineArgs, err := parseRemoteCliFlags(commandLineArgs)
if err != nil {
return 1, status.WrapError(err, "parse cli flags")
}

tempDir, err := os.MkdirTemp("", "buildbuddy-cli-*")
if err != nil {
return 1, err
Expand All @@ -1004,7 +1014,7 @@ func HandleRemoteBazel(commandLineArgs []string) (int, error) {
}()

ctx := context.Background()
repoConfig, err := Config(".")
repoConfig, err := Config()
if err != nil {
return 1, status.WrapError(err, "remote config")
}
Expand All @@ -1014,11 +1024,6 @@ func HandleRemoteBazel(commandLineArgs []string) (int, error) {
return 1, status.WrapError(err, "finding workspace")
}

commandLineArgs, err = parseRemoteCliFlags(commandLineArgs)
if err != nil {
return 1, status.WrapError(err, "parse cli flags")
}

runner := *remoteRunner
if !strings.HasPrefix(runner, "grpc") {
runner = "grpcs://" + runner
Expand Down
15 changes: 8 additions & 7 deletions cli/remotebazel/remotebazel_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ func TestGitConfig_BranchAndSha(t *testing.T) {
name: "Local branch does not exist remotely",
localBranchExistsRemotely: false,
localCommitExistsRemotely: false,
expectedBranch: "refs/heads/master",
expectedBranch: "master",
expectedCommit: remoteMasterHeadCommit,
expectedPatches: []string{"local_file.txt"},
},
Expand Down Expand Up @@ -272,7 +272,7 @@ func TestGitConfig_BranchAndSha(t *testing.T) {
testgit.CommitFiles(t, localRepoPath, map[string]string{"local_file.txt": "exit 0"})
}

config, err := Config(localRepoPath)
config, err := Config()
require.NoError(t, err, tc.name)

require.Equal(t, tc.expectedBranch, config.Ref, tc.name)
Expand All @@ -287,6 +287,7 @@ func TestGitConfig_BranchAndSha(t *testing.T) {
func TestGitConfig_FetchURL(t *testing.T) {
// Setup the "remote" repo
remoteRepoPath, _ := testgit.MakeTempRepo(t, map[string]string{"hello.txt": "exit 0"})
remoteUrl := "file://" + remoteRepoPath

testCases := []struct {
name string
Expand All @@ -296,13 +297,13 @@ func TestGitConfig_FetchURL(t *testing.T) {
}{
{
name: "One remote is configured",
expectedURL: "file://" + remoteRepoPath,
expectedURL: remoteUrl,
},
{
name: "Selected remote is cached",
multipleRemotes: true,
isRemoteCached: true,
expectedURL: "fake.git",
expectedURL: remoteUrl,
},
}

Expand All @@ -313,13 +314,13 @@ func TestGitConfig_FetchURL(t *testing.T) {
require.NoError(t, err, tc.name)

if tc.multipleRemotes {
testshell.Run(t, localRepoPath, "git remote add extra fake.git")
testshell.Run(t, localRepoPath, "git remote add extra "+remoteUrl)
}
if tc.isRemoteCached {
testshell.Run(t, localRepoPath, fmt.Sprintf("git config --replace-all %s.%s extra", gitConfigSection, gitConfigRemoteBazelRemote))
}

config, err := Config(localRepoPath)
config, err := Config()
require.NoError(t, err, tc.name)
require.Equal(t, tc.expectedURL, config.URL)
}
Expand Down Expand Up @@ -353,7 +354,7 @@ func TestGeneratingPatches(t *testing.T) {
echo -ne '\x00\x01\x02\x03\x04' > b2.bin
`)

config, err := Config(localRepoPath)
config, err := Config()
require.NoError(t, err)

require.Equal(t, 4, len(config.Patches))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ func TestCancel(t *testing.T) {
require.NoError(t, err)
wsFilePath, err := bazel.FindWorkspaceFile(".")
require.NoError(t, err)
repoConfig, err := remotebazel.Config(".")
repoConfig, err := remotebazel.Config()
require.NoError(t, err)
_, err = remotebazel.Run(
ctxWithCancel,
Expand Down

0 comments on commit 70e2531

Please sign in to comment.