diff --git a/go/pkg/client/BUILD.bazel b/go/pkg/client/BUILD.bazel index 529fc4264..f457bcf1e 100644 --- a/go/pkg/client/BUILD.bazel +++ b/go/pkg/client/BUILD.bazel @@ -52,6 +52,7 @@ go_test( "exec_test.go", "retries_test.go", "tree_test.go", + "tree_whitebox_test.go", ], embed = [":go_default_library"], deps = [ diff --git a/go/pkg/client/client.go b/go/pkg/client/client.go index 15c8ef831..a9ea39574 100644 --- a/go/pkg/client/client.go +++ b/go/pkg/client/client.go @@ -85,7 +85,9 @@ type Client struct { // UtilizeLocality is to specify whether client downloads files utilizing disk access locality. UtilizeLocality UtilizeLocality // UnifiedCASOps specifies whether the client uploads/downloads files in the background - UnifiedCASOps UnifiedCASOps + UnifiedCASOps UnifiedCASOps + // TreeSymlinkOpts controls how symlinks are handled when constructing a tree. + TreeSymlinkOpts *TreeSymlinkOpts serverCaps *repb.ServerCapabilities useBatchOps UseBatchOps casConcurrency int64 @@ -170,6 +172,10 @@ func (s UnifiedCASOps) Apply(c *Client) { c.UnifiedCASOps = s } +func (o *TreeSymlinkOpts) Apply(c *Client) { + c.TreeSymlinkOpts = o +} + // MaxBatchDigests is maximum amount of digests to batch in batched operations. type MaxBatchDigests int diff --git a/go/pkg/client/tree.go b/go/pkg/client/tree.go index b93fe2899..e34c37f5a 100644 --- a/go/pkg/client/tree.go +++ b/go/pkg/client/tree.go @@ -22,8 +22,9 @@ import ( // tree later. It corresponds roughly to a *repb.Directory, but with pointers, not digests, used to // refer to other nodes. type treeNode struct { - files map[string]*fileNode - dirs map[string]*treeNode + files map[string]*fileNode + dirs map[string]*treeNode + symlinks map[string]*symlinkNode } type fileNode struct { @@ -31,9 +32,14 @@ type fileNode struct { isExecutable bool } +type symlinkNode struct { + target string +} + type fileSysNode struct { file *fileNode emptyDirectoryMarker bool + symlink *symlinkNode } // TreeStats contains various stats/metadata of the constructed Merkle tree. @@ -45,11 +51,29 @@ type TreeStats struct { InputFiles int // The total number of input directories. InputDirectories int + // The total number of input symlinks + InputSymlinks int // The overall number of bytes from all the inputs. TotalInputBytes int64 // TODO(olaola): number of FileMetadata cache hits/misses go here. } +// TreeSymlinkOpts controls how symlinks are handled when constructing a tree. +type TreeSymlinkOpts struct { + // By default, a symlink is converted into its targeted file. + // If true, preserve the symlink. + Preserved bool + // If true, the symlink target (if not dangling) is followed. + FollowsTarget bool +} + +// DefaultTreeSymlinkOpts returns a default DefaultTreeSymlinkOpts object. +func DefaultTreeSymlinkOpts() *TreeSymlinkOpts { + return &TreeSymlinkOpts{ + FollowsTarget: true, + } +} + // shouldIgnore returns whether a given input should be excluded based on the given InputExclusions, func shouldIgnore(inp string, t command.InputType, excl []*command.InputExclusion) bool { for _, r := range excl { @@ -63,23 +87,66 @@ func shouldIgnore(inp string, t command.InputType, excl []*command.InputExclusio return false } +// getTargetRelPath returns the part of target that is relative to execRoot, +// iff target is under execRoot. Otherwise it returns an error. +func getTargetRelPath(execRoot, target string) (string, error) { + if !filepath.IsAbs(target) { + target = filepath.Clean(filepath.Join(execRoot, target)) + } + return getRelPath(execRoot, target) +} + +// preprocessSymlink returns two things: if the routine should continue, and if +// there is an error to be reported back. +func preprocessSymlink(execRoot, symlink string, meta *filemetadata.SymlinkMetadata, opts *TreeSymlinkOpts) (bool, error) { + if meta.IsDangling { + // For now, we do not treat a dangling symlink as an error. In the case + // where the symlink is not preserved (i.e. needs to be converted to a + // file), we simply ignore this path in the finalized tree. + return opts.Preserved, nil + } + if !opts.Preserved { + // We will convert the symlink to a normal file, so it doesn't matter + // where target is under execRoot or not. + return true, nil + } + + if _, err := getTargetRelPath(execRoot, meta.Target); err != nil { + return false, err + } + return true, nil +} + // loadFiles reads all files specified by the given InputSpec (descending into subdirectories // recursively), and loads their contents into the provided map. -func loadFiles(execRoot string, excl []*command.InputExclusion, path string, fs map[string]*fileSysNode, chunkSize int, cache filemetadata.Cache) error { +func loadFiles(execRoot string, excl []*command.InputExclusion, path string, fs map[string]*fileSysNode, chunkSize int, cache filemetadata.Cache, opts *TreeSymlinkOpts) error { + if opts == nil { + opts = DefaultTreeSymlinkOpts() + } absPath := filepath.Clean(filepath.Join(execRoot, path)) normPath, err := getRelPath(execRoot, absPath) if err != nil { return err } meta := cache.Get(absPath) - t := command.FileInputType - if smd := meta.Symlink; smd != nil && smd.IsDangling { - return nil - } - if meta.Err != nil { + isSymlink := meta.Symlink != nil + if isSymlink { + cont, err := preprocessSymlink(execRoot, path, meta.Symlink, opts) + if err != nil { + return err + } + if !cont { + return nil + } + } else if meta.Err != nil { return meta.Err } - if meta.IsDirectory { + t := command.FileInputType + if isSymlink && opts.Preserved { + // An implication of this is that, if a path is a symlink to a + // directory, then the symlink attribute takes precedence. + t = command.SymlinkInputType + } else if meta.IsDirectory { t = command.DirectoryInputType } if shouldIgnore(absPath, t, excl) { @@ -93,6 +160,22 @@ func loadFiles(execRoot string, excl []*command.InputExclusion, path string, fs }, } return nil + } else if t == command.SymlinkInputType { + relTarget, err := getTargetRelPath(execRoot, meta.Symlink.Target) + if err != nil { + return err + } + fs[normPath] = &fileSysNode{ + // We cannot directly use meta.Symlink.Target, because it could be + // an absolute path. Since the remote worker will map the exec root + // to a different directory, we must strip away the local exec root. + // See https://github.com/bazelbuild/remote-apis-sdks/pull/229#discussion_r524830458 + symlink: &symlinkNode{target: relTarget}, + } + if meta.Symlink.IsDangling || !opts.FollowsTarget { + return nil + } + return loadFiles(execRoot, excl, relTarget, fs, chunkSize, cache, opts) } // Directory files, err := ioutil.ReadDir(absPath) @@ -105,7 +188,7 @@ func loadFiles(execRoot string, excl []*command.InputExclusion, path string, fs return nil } for _, f := range files { - if e := loadFiles(execRoot, excl, filepath.Join(normPath, f.Name()), fs, chunkSize, cache); e != nil { + if e := loadFiles(execRoot, excl, filepath.Join(normPath, f.Name()), fs, chunkSize, cache, opts); e != nil { return e } } @@ -141,7 +224,7 @@ func (c *Client) ComputeMerkleTree(execRoot string, is *command.InputSpec, chunk if i == "" { return digest.Empty, nil, nil, errors.New("empty Input, use \".\" for entire exec root") } - if e := loadFiles(execRoot, is.InputExclusions, i, fs, chunkSize, cache); e != nil { + if e := loadFiles(execRoot, is.InputExclusions, i, fs, chunkSize, cache, c.TreeSymlinkOpts); e != nil { return digest.Empty, nil, nil, e } } @@ -184,10 +267,17 @@ func buildTree(files map[string]*fileSysNode) *treeNode { node.dirs[base] = &treeNode{} continue } - if node.files == nil { - node.files = make(map[string]*fileNode) + if fn.file != nil { + if node.files == nil { + node.files = make(map[string]*fileNode) + } + node.files[base] = fn.file + } else { + if node.symlinks == nil { + node.symlinks = make(map[string]*symlinkNode) + } + node.symlinks[base] = fn.symlink } - node.files[base] = fn.file } return root } @@ -217,6 +307,12 @@ func packageTree(t *treeNode, chunkSize int, stats *TreeStats) (root digest.Dige } sort.Slice(dir.Files, func(i, j int) bool { return dir.Files[i].Name < dir.Files[j].Name }) + for name, sn := range t.symlinks { + dir.Symlinks = append(dir.Symlinks, &repb.SymlinkNode{Name: name, Target: sn.target}) + stats.InputSymlinks++ + } + sort.Slice(dir.Symlinks, func(i, j int) bool { return dir.Symlinks[i].Name < dir.Symlinks[j].Name }) + ch, err := chunker.NewFromProto(dir, chunkSize) if err != nil { return digest.Empty, nil, err @@ -387,7 +483,7 @@ func (c *Client) ComputeOutputsToUpload(execRoot string, paths []string, chunkSi } // A directory. fs := make(map[string]*fileSysNode) - if e := loadFiles(absPath, nil, "", fs, chunkSize, cache); e != nil { + if e := loadFiles(absPath, nil, "", fs, chunkSize, cache, c.TreeSymlinkOpts); e != nil { return nil, nil, e } ft := buildTree(fs) diff --git a/go/pkg/client/tree_test.go b/go/pkg/client/tree_test.go index 985814c6a..24689974e 100644 --- a/go/pkg/client/tree_test.go +++ b/go/pkg/client/tree_test.go @@ -33,7 +33,7 @@ var ( fooDirBlob, barDirBlob, foobarDirBlob = mustMarshal(fooDir), mustMarshal(barDir), mustMarshal(foobarDir) fooDirDg, barDirDg, foobarDirDg = digest.NewFromBlob(fooDirBlob), digest.NewFromBlob(barDirBlob), digest.NewFromBlob(foobarDirBlob) - fooDirDgPb, barDirDgPb = fooDirDg.ToProto(), barDirDg.ToProto() + fooDirDgPb, barDirDgPb, foobarDirDgPb = fooDirDg.ToProto(), barDirDg.ToProto(), foobarDirDg.ToProto() ) func mustMarshal(p proto.Message) []byte { @@ -357,6 +357,7 @@ func TestComputeMerkleTree(t *testing.T) { wantCacheCalls map[string]int // The expected wantStats.TotalInputBytes is calculated by adding the marshalled rootDir size. wantStats *client.TreeStats + treeOpts *client.TreeSymlinkOpts }{ { desc: "Empty directory", @@ -490,6 +491,92 @@ func TestComputeMerkleTree(t *testing.T) { TotalInputBytes: 2*fooDg.Size + fooDirDg.Size, }, }, + { + desc: "File relative symlink (preserved)", + input: []*inputPath{ + {path: "fooDir/foo", fileContents: fooBlob, isExecutable: true}, + {path: "fooSym", isSymlink: true, symlinkTarget: "fooDir/foo"}, + }, + spec: &command.InputSpec{ + // The symlink target will be traversed recursively. + Inputs: []string{"fooSym"}, + }, + rootDir: &repb.Directory{ + Directories: []*repb.DirectoryNode{{Name: "fooDir", Digest: fooDirDgPb}}, + Symlinks: []*repb.SymlinkNode{{Name: "fooSym", Target: "fooDir/foo"}}, + }, + additionalBlobs: [][]byte{fooBlob, fooDirBlob}, + wantCacheCalls: map[string]int{ + "fooDir/foo": 1, + "fooSym": 1, + }, + wantStats: &client.TreeStats{ + InputDirectories: 2, + InputFiles: 1, + InputSymlinks: 1, + TotalInputBytes: fooDg.Size + fooDirDg.Size, + }, + treeOpts: &client.TreeSymlinkOpts{ + Preserved: true, + FollowsTarget: true, + }, + }, + { + desc: "File relative symlink (preserved but not followed)", + input: []*inputPath{ + {path: "fooDir/foo", fileContents: fooBlob, isExecutable: true}, + {path: "fooSym", isSymlink: true, symlinkTarget: "fooDir/foo"}, + }, + spec: &command.InputSpec{ + Inputs: []string{"fooSym"}, + }, + rootDir: &repb.Directory{ + Directories: nil, + Symlinks: []*repb.SymlinkNode{{Name: "fooSym", Target: "fooDir/foo"}}, + }, + wantCacheCalls: map[string]int{ + "fooSym": 1, + }, + wantStats: &client.TreeStats{ + InputDirectories: 1, + InputFiles: 0, + InputSymlinks: 1, + TotalInputBytes: 0, + }, + treeOpts: &client.TreeSymlinkOpts{ + Preserved: true, + }, + }, + { + desc: "File absolute symlink (preserved)", + input: []*inputPath{ + {path: "fooDir/foo", fileContents: fooBlob, isExecutable: true}, + {path: "fooSym", isSymlink: true, isAbsolute: true, symlinkTarget: "fooDir/foo"}, + }, + spec: &command.InputSpec{ + // The symlink target will be traversed recursively. + Inputs: []string{"fooSym"}, + }, + rootDir: &repb.Directory{ + Directories: []*repb.DirectoryNode{{Name: "fooDir", Digest: fooDirDgPb}}, + Symlinks: []*repb.SymlinkNode{{Name: "fooSym", Target: "fooDir/foo"}}, + }, + additionalBlobs: [][]byte{fooBlob, fooDirBlob}, + wantCacheCalls: map[string]int{ + "fooDir/foo": 1, + "fooSym": 1, + }, + wantStats: &client.TreeStats{ + InputDirectories: 2, + InputFiles: 1, + InputSymlinks: 1, + TotalInputBytes: fooDg.Size + fooDirDg.Size, + }, + treeOpts: &client.TreeSymlinkOpts{ + Preserved: true, + FollowsTarget: true, + }, + }, { desc: "File invalid symlink", input: []*inputPath{ @@ -516,6 +603,36 @@ func TestComputeMerkleTree(t *testing.T) { TotalInputBytes: 2*fooDg.Size + fooDirDg.Size, }, }, + { + desc: "Dangling symlink is preserved", + input: []*inputPath{ + {path: "fooDir/foo", fileContents: fooBlob, isExecutable: true}, + {path: "invalidSym", isSymlink: true, symlinkTarget: "fooDir/invalid"}, + }, + spec: &command.InputSpec{ + Inputs: []string{"fooDir", "invalidSym"}, + }, + rootDir: &repb.Directory{ + Directories: []*repb.DirectoryNode{{Name: "fooDir", Digest: fooDirDgPb}}, + Files: nil, + Symlinks: []*repb.SymlinkNode{{Name: "invalidSym", Target: "fooDir/invalid"}}, + }, + additionalBlobs: [][]byte{fooBlob, fooDirBlob}, + wantCacheCalls: map[string]int{ + "fooDir": 1, + "fooDir/foo": 1, + "invalidSym": 1, + }, + wantStats: &client.TreeStats{ + InputDirectories: 2, + InputFiles: 1, + InputSymlinks: 1, + TotalInputBytes: fooDg.Size + fooDirDg.Size, + }, + treeOpts: &client.TreeSymlinkOpts{ + Preserved: true, + }, + }, { desc: "Directory absolute symlink", input: []*inputPath{ @@ -570,6 +687,72 @@ func TestComputeMerkleTree(t *testing.T) { TotalInputBytes: fooDg.Size + fooDirDg.Size + barDg.Size + barDirDg.Size, }, }, + { + desc: "Directory absolute symlink (preserved)", + input: []*inputPath{ + {path: "foobarDir/foo", fileContents: fooBlob, isExecutable: true}, + {path: "foobarDir/bar", fileContents: barBlob}, + {path: "foobarSymDir", isSymlink: true, isAbsolute: true, symlinkTarget: "foobarDir"}, + }, + spec: &command.InputSpec{ + // The symlink target will be traversed recursively. + Inputs: []string{"foobarSymDir"}, + }, + rootDir: &repb.Directory{ + Directories: []*repb.DirectoryNode{{Name: "foobarDir", Digest: foobarDirDgPb}}, + Symlinks: []*repb.SymlinkNode{{Name: "foobarSymDir", Target: "foobarDir"}}, + }, + additionalBlobs: [][]byte{fooBlob, barBlob, foobarDirBlob}, + wantCacheCalls: map[string]int{ + "foobarDir": 1, + "foobarDir/foo": 1, + "foobarDir/bar": 1, + "foobarSymDir": 1, + }, + wantStats: &client.TreeStats{ + InputDirectories: 2, + InputFiles: 2, + InputSymlinks: 1, + TotalInputBytes: fooDg.Size + barDg.Size + foobarDirDg.Size, + }, + treeOpts: &client.TreeSymlinkOpts{ + Preserved: true, + FollowsTarget: true, + }, + }, + { + desc: "Directory relative symlink (preserved)", + input: []*inputPath{ + {path: "foobarDir/foo", fileContents: fooBlob, isExecutable: true}, + {path: "foobarDir/bar", fileContents: barBlob}, + {path: "foobarSymDir", isSymlink: true, symlinkTarget: "foobarDir"}, + }, + spec: &command.InputSpec{ + // The symlink target will be traversed recursively. + Inputs: []string{"foobarSymDir"}, + }, + rootDir: &repb.Directory{ + Directories: []*repb.DirectoryNode{{Name: "foobarDir", Digest: foobarDirDgPb}}, + Symlinks: []*repb.SymlinkNode{{Name: "foobarSymDir", Target: "foobarDir"}}, + }, + additionalBlobs: [][]byte{fooBlob, barBlob, foobarDirBlob}, + wantCacheCalls: map[string]int{ + "foobarDir": 1, + "foobarDir/foo": 1, + "foobarDir/bar": 1, + "foobarSymDir": 1, + }, + wantStats: &client.TreeStats{ + InputDirectories: 2, + InputFiles: 2, + InputSymlinks: 1, + TotalInputBytes: fooDg.Size + barDg.Size + foobarDirDg.Size, + }, + treeOpts: &client.TreeSymlinkOpts{ + Preserved: true, + FollowsTarget: true, + }, + }, { desc: "De-duplicating files", input: []*inputPath{ @@ -582,7 +765,7 @@ func TestComputeMerkleTree(t *testing.T) { }, rootDir: &repb.Directory{Directories: []*repb.DirectoryNode{ {Name: "fooDir", Digest: fooDirDgPb}, - {Name: "foobarDir", Digest: foobarDirDg.ToProto()}, + {Name: "foobarDir", Digest: foobarDirDgPb}, }}, additionalBlobs: [][]byte{fooBlob, barBlob, fooDirBlob, foobarDirBlob}, wantCacheCalls: map[string]int{ @@ -904,10 +1087,11 @@ func TestComputeMerkleTree(t *testing.T) { e, cleanup := fakes.NewTestEnv(t) defer cleanup() + tc.treeOpts.Apply(e.Client.GrpcClient) gotRootDg, inputs, stats, err := e.Client.GrpcClient.ComputeMerkleTree(root, tc.spec, chunker.DefaultChunkSize, cache) if err != nil { - t.Errorf("ComputeMerkleTree(...) = gave error %v, want success", err) + t.Errorf("ComputeMerkleTree(...) = gave error %q, want success", err) } for _, ch := range inputs { blob, err := ch.FullData() @@ -945,9 +1129,10 @@ func TestComputeMerkleTree(t *testing.T) { func TestComputeMerkleTreeErrors(t *testing.T) { tests := []struct { - desc string - input []*inputPath - spec *command.InputSpec + desc string + input []*inputPath + spec *command.InputSpec + treeOpts *client.TreeSymlinkOpts }{ { desc: "empty input", @@ -973,6 +1158,19 @@ func TestComputeMerkleTreeErrors(t *testing.T) { }, spec: &command.InputSpec{Inputs: []string{"a", "dir", "dir/b"}}, }, + { + desc: "Preserved symlink escaping exec root", + input: []*inputPath{ + {path: "../foo", fileContents: fooBlob, isExecutable: true}, + {path: "escapingFoo", isSymlink: true, symlinkTarget: "../foo"}, + }, + spec: &command.InputSpec{ + Inputs: []string{"escapingFoo"}, + }, + treeOpts: &client.TreeSymlinkOpts{ + Preserved: true, + }, + }, } for _, tc := range tests { @@ -987,6 +1185,7 @@ func TestComputeMerkleTreeErrors(t *testing.T) { t.Run(tc.desc, func(t *testing.T) { e, cleanup := fakes.NewTestEnv(t) defer cleanup() + tc.treeOpts.Apply(e.Client.GrpcClient) if _, _, _, err := e.Client.GrpcClient.ComputeMerkleTree(root, tc.spec, chunker.DefaultChunkSize, filemetadata.NewNoopCache()); err == nil { t.Errorf("ComputeMerkleTree(%v) succeeded, want error", tc.spec) diff --git a/go/pkg/client/tree_whitebox_test.go b/go/pkg/client/tree_whitebox_test.go new file mode 100644 index 000000000..c8b8ef265 --- /dev/null +++ b/go/pkg/client/tree_whitebox_test.go @@ -0,0 +1,51 @@ +package client + +import "testing" + +func TestGetTargetRelPath(t *testing.T) { + execRoot := "/execRoot/dir" + tests := []struct { + desc string + target string + wantErr bool + relTarget string + }{ + { + desc: "basic", + target: "foo", + relTarget: "foo", + }, + { + desc: "there and back again", + target: "../dir/sub/foo", + relTarget: "sub/foo", + }, + { + desc: "relative target path escaping exec root", + target: "../foo", + wantErr: true, + }, + { + desc: "absolute target path under exec root", + target: execRoot + "/sub/foo", + relTarget: "sub/foo", + }, + { + desc: "absolute target path escaping exec root", + target: "/another/dir/foo", + wantErr: true, + }, + } + + for _, tc := range tests { + t.Run(tc.desc, func(t *testing.T) { + res, err := getTargetRelPath(execRoot, tc.target) + if (err != nil) != tc.wantErr { + t.Errorf("getTargetRelPath(target=%q) error: expected=%v got=%v", tc.target, tc.wantErr, err) + } + if err == nil && res != tc.relTarget { + t.Errorf("getTargetRelPath(target=%q) result: expected=%v got=%v", tc.target, tc.relTarget, res) + } + }) + } +} diff --git a/go/pkg/command/command.go b/go/pkg/command/command.go index b1361ab9b..7a67258fd 100644 --- a/go/pkg/command/command.go +++ b/go/pkg/command/command.go @@ -31,6 +31,9 @@ const ( // FileInputType means only files match. FileInputType + + // SymlinkInputType means only symlink match. + SymlinkInputType ) var inputTypes = [...]string{"UnspecifiedInputType", "DirectoryInputType", "FileInputType"}