Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support symlinks when constructing a Merkle tree. #229

Merged
merged 14 commits into from
Nov 18, 2020
1 change: 1 addition & 0 deletions go/pkg/client/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ go_test(
"exec_test.go",
"retries_test.go",
"tree_test.go",
"tree_whitebox_test.go",
],
embed = [":go_default_library"],
deps = [
Expand Down
8 changes: 7 additions & 1 deletion go/pkg/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,9 @@ type Client struct {
// UtilizeLocality is to specify whether client downloads files utilizing disk access locality.
UtilizeLocality UtilizeLocality
// UnifiedCASOps specifies whether the client uploads/downloads files in the background
UnifiedCASOps UnifiedCASOps
UnifiedCASOps UnifiedCASOps
// TreeSymlinkOpts controls how symlinks are handled when constructing a tree.
TreeSymlinkOpts *TreeSymlinkOpts
serverCaps *repb.ServerCapabilities
useBatchOps UseBatchOps
casConcurrency int64
Expand Down Expand Up @@ -170,6 +172,10 @@ func (s UnifiedCASOps) Apply(c *Client) {
c.UnifiedCASOps = s
}

func (o *TreeSymlinkOpts) Apply(c *Client) {
c.TreeSymlinkOpts = o
}

// MaxBatchDigests is maximum amount of digests to batch in batched operations.
type MaxBatchDigests int

Expand Down
126 changes: 111 additions & 15 deletions go/pkg/client/tree.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,24 @@ import (
// tree later. It corresponds roughly to a *repb.Directory, but with pointers, not digests, used to
// refer to other nodes.
type treeNode struct {
files map[string]*fileNode
dirs map[string]*treeNode
files map[string]*fileNode
dirs map[string]*treeNode
symlinks map[string]*symlinkNode
}

type fileNode struct {
chunker *chunker.Chunker
isExecutable bool
}

type symlinkNode struct {
target string
}

type fileSysNode struct {
file *fileNode
emptyDirectoryMarker bool
symlink *symlinkNode
}

// TreeStats contains various stats/metadata of the constructed Merkle tree.
Expand All @@ -45,11 +51,29 @@ type TreeStats struct {
InputFiles int
// The total number of input directories.
InputDirectories int
// The total number of input symlinks
InputSymlinks int
// The overall number of bytes from all the inputs.
TotalInputBytes int64
// TODO(olaola): number of FileMetadata cache hits/misses go here.
}

// TreeSymlinkOpts controls how symlinks are handled when constructing a tree.
type TreeSymlinkOpts struct {
// By default, a symlink is converted into its targeted file.
// If true, preserve the symlink.
Preserved bool
// If true, the symlink target (if not dangling) is followed.
FollowsTarget bool
}

// DefaultTreeSymlinkOpts returns a default DefaultTreeSymlinkOpts object.
func DefaultTreeSymlinkOpts() *TreeSymlinkOpts {
return &TreeSymlinkOpts{
FollowsTarget: true,
}
}

// shouldIgnore returns whether a given input should be excluded based on the given InputExclusions,
func shouldIgnore(inp string, t command.InputType, excl []*command.InputExclusion) bool {
for _, r := range excl {
Expand All @@ -63,23 +87,66 @@ func shouldIgnore(inp string, t command.InputType, excl []*command.InputExclusio
return false
}

// getTargetRelPath returns the part of target that is relative to execRoot,
ola-rozenfeld marked this conversation as resolved.
Show resolved Hide resolved
// iff target is under execRoot. Otherwise it returns an error.
func getTargetRelPath(execRoot, target string) (string, error) {
if !filepath.IsAbs(target) {
target = filepath.Clean(filepath.Join(execRoot, target))
}
return getRelPath(execRoot, target)
}

// preprocessSymlink returns two things: if the routine should continue, and if
// there is an error to be reported back.
func preprocessSymlink(execRoot, symlink string, meta *filemetadata.SymlinkMetadata, opts *TreeSymlinkOpts) (bool, error) {
if meta.IsDangling {
// For now, we do not treat a dangling symlink as an error. In the case
// where the symlink is not preserved (i.e. needs to be converted to a
// file), we simply ignore this path in the finalized tree.
return opts.Preserved, nil
}
if !opts.Preserved {
// We will convert the symlink to a normal file, so it doesn't matter
// where target is under execRoot or not.
return true, nil
}

if _, err := getTargetRelPath(execRoot, meta.Target); err != nil {
return false, err
}
return true, nil
}

// loadFiles reads all files specified by the given InputSpec (descending into subdirectories
// recursively), and loads their contents into the provided map.
func loadFiles(execRoot string, excl []*command.InputExclusion, path string, fs map[string]*fileSysNode, chunkSize int, cache filemetadata.Cache) error {
func loadFiles(execRoot string, excl []*command.InputExclusion, path string, fs map[string]*fileSysNode, chunkSize int, cache filemetadata.Cache, opts *TreeSymlinkOpts) error {
if opts == nil {
opts = DefaultTreeSymlinkOpts()
}
absPath := filepath.Clean(filepath.Join(execRoot, path))
normPath, err := getRelPath(execRoot, absPath)
if err != nil {
return err
}
meta := cache.Get(absPath)
t := command.FileInputType
if smd := meta.Symlink; smd != nil && smd.IsDangling {
return nil
}
if meta.Err != nil {
isSymlink := meta.Symlink != nil
if isSymlink {
cont, err := preprocessSymlink(execRoot, path, meta.Symlink, opts)
if err != nil {
return err
}
if !cont {
return nil
ola-rozenfeld marked this conversation as resolved.
Show resolved Hide resolved
}
} else if meta.Err != nil {
return meta.Err
}
if meta.IsDirectory {
t := command.FileInputType
if isSymlink && opts.Preserved {
// An implication of this is that, if a path is a symlink to a
// directory, then the symlink attribute takes precedence.
t = command.SymlinkInputType
} else if meta.IsDirectory {
t = command.DirectoryInputType
}
if shouldIgnore(absPath, t, excl) {
Expand All @@ -93,6 +160,22 @@ func loadFiles(execRoot string, excl []*command.InputExclusion, path string, fs
},
}
return nil
} else if t == command.SymlinkInputType {
relTarget, err := getTargetRelPath(execRoot, meta.Symlink.Target)
if err != nil {
return err
}
fs[normPath] = &fileSysNode{
// We cannot directly use meta.Symlink.Target, because it could be
// an absolute path. Since the remote worker will map the exec root
// to a different directory, we must strip away the local exec root.
// See https://github.com/bazelbuild/remote-apis-sdks/pull/229#discussion_r524830458
symlink: &symlinkNode{target: relTarget},
}
if meta.Symlink.IsDangling || !opts.FollowsTarget {
return nil
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, one thing I wanted to check: is it possible to create dangling symlinks on Windows? I seem to recall there were some problems with that.

Copy link
Contributor Author

@k-ye k-ye Nov 18, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, i don't have a Win at this moment to test this. But i think the test case has been creating dangling symlinks for quite a while. Have you noticed that it has failed on Win?

if err := os.Symlink(target, path); err != nil {
return err
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, you're right -- I guess that does work and I misremembered. Thank you!

}
return loadFiles(execRoot, excl, relTarget, fs, chunkSize, cache, opts)
ola-rozenfeld marked this conversation as resolved.
Show resolved Hide resolved
}
// Directory
files, err := ioutil.ReadDir(absPath)
Expand All @@ -105,7 +188,7 @@ func loadFiles(execRoot string, excl []*command.InputExclusion, path string, fs
return nil
}
for _, f := range files {
if e := loadFiles(execRoot, excl, filepath.Join(normPath, f.Name()), fs, chunkSize, cache); e != nil {
if e := loadFiles(execRoot, excl, filepath.Join(normPath, f.Name()), fs, chunkSize, cache, opts); e != nil {
return e
}
}
Expand Down Expand Up @@ -141,7 +224,7 @@ func (c *Client) ComputeMerkleTree(execRoot string, is *command.InputSpec, chunk
if i == "" {
return digest.Empty, nil, nil, errors.New("empty Input, use \".\" for entire exec root")
}
if e := loadFiles(execRoot, is.InputExclusions, i, fs, chunkSize, cache); e != nil {
if e := loadFiles(execRoot, is.InputExclusions, i, fs, chunkSize, cache, c.TreeSymlinkOpts); e != nil {
return digest.Empty, nil, nil, e
}
}
Expand Down Expand Up @@ -184,10 +267,17 @@ func buildTree(files map[string]*fileSysNode) *treeNode {
node.dirs[base] = &treeNode{}
continue
}
if node.files == nil {
node.files = make(map[string]*fileNode)
if fn.file != nil {
if node.files == nil {
node.files = make(map[string]*fileNode)
}
node.files[base] = fn.file
} else {
if node.symlinks == nil {
node.symlinks = make(map[string]*symlinkNode)
}
node.symlinks[base] = fn.symlink
}
node.files[base] = fn.file
}
return root
}
Expand Down Expand Up @@ -217,6 +307,12 @@ func packageTree(t *treeNode, chunkSize int, stats *TreeStats) (root digest.Dige
}
sort.Slice(dir.Files, func(i, j int) bool { return dir.Files[i].Name < dir.Files[j].Name })

for name, sn := range t.symlinks {
dir.Symlinks = append(dir.Symlinks, &repb.SymlinkNode{Name: name, Target: sn.target})
stats.InputSymlinks++
}
sort.Slice(dir.Symlinks, func(i, j int) bool { return dir.Symlinks[i].Name < dir.Symlinks[j].Name })

ch, err := chunker.NewFromProto(dir, chunkSize)
if err != nil {
return digest.Empty, nil, err
Expand Down Expand Up @@ -387,7 +483,7 @@ func (c *Client) ComputeOutputsToUpload(execRoot string, paths []string, chunkSi
}
// A directory.
fs := make(map[string]*fileSysNode)
if e := loadFiles(absPath, nil, "", fs, chunkSize, cache); e != nil {
if e := loadFiles(absPath, nil, "", fs, chunkSize, cache, c.TreeSymlinkOpts); e != nil {
return nil, nil, e
}
ft := buildTree(fs)
Expand Down
Loading