From af276594bcb7479b07ba04c1722810650a2c0592 Mon Sep 17 00:00:00 2001 From: Matthias Glastra Date: Mon, 29 Apr 2024 08:58:53 +0200 Subject: [PATCH] feat: Adding functionality for dirhash. Signed-off-by: Matthias Glastra --- attestation/context.go | 39 +++++++++++++++----- attestation/file/file.go | 31 ++++++++++++---- attestation/file/file_test.go | 42 ++++++++++++++++++++-- attestation/link/link_test.go | 4 +-- attestation/material/material.go | 2 +- attestation/product/product.go | 17 +++++++-- cryptoutil/digestset.go | 52 ++++++++++++++++++++++----- cryptoutil/dirhash.go | 61 ++++++++++++++++++++++++++++++++ 8 files changed, 216 insertions(+), 32 deletions(-) create mode 100644 cryptoutil/dirhash.go diff --git a/attestation/context.go b/attestation/context.go index 969805a3..4f81086b 100644 --- a/attestation/context.go +++ b/attestation/context.go @@ -22,6 +22,7 @@ import ( "sync" "time" + "github.com/gobwas/glob" "github.com/in-toto/go-witness/cryptoutil" "github.com/in-toto/go-witness/log" ) @@ -83,6 +84,20 @@ func WithWorkingDir(workingDir string) AttestationContextOption { } } +func WithDirHashGlob(dirHashGlob []string) AttestationContextOption { + return func(ctx *AttestationContext) { + if len(dirHashGlob) > 0 { + ctx.dirHashGlob = dirHashGlob + + ctx.dirHashGlobCompiled = make([]glob.Glob, len(ctx.dirHashGlob)) + for i, dirHashGlobItem := range dirHashGlob { + dirHashGlobItemCompiled, _ := glob.Compile(dirHashGlobItem) + ctx.dirHashGlobCompiled[i] = dirHashGlobItemCompiled + } + } + } +} + type CompletedAttestor struct { Attestor Attestor StartTime time.Time @@ -91,15 +106,17 @@ type CompletedAttestor struct { } type AttestationContext struct { - ctx context.Context - attestors []Attestor - workingDir string - hashes []cryptoutil.DigestValue - completedAttestors []CompletedAttestor - products map[string]Product - materials map[string]cryptoutil.DigestSet - stepName string - mutex sync.RWMutex + ctx context.Context + attestors []Attestor + workingDir string + dirHashGlob []string + dirHashGlobCompiled []glob.Glob + hashes []cryptoutil.DigestValue + completedAttestors []CompletedAttestor + products map[string]Product + materials map[string]cryptoutil.DigestSet + stepName string + mutex sync.RWMutex } type Product struct { @@ -208,6 +225,10 @@ func (ctx *AttestationContext) runAttestor(attestor Attestor) { log.Infof("Finished %v attestor... (%vs)", attestor.Name(), time.Since(startTime).Seconds()) } +func (ctx *AttestationContext) DirHashGlob() []glob.Glob { + return ctx.dirHashGlobCompiled +} + func (ctx *AttestationContext) CompletedAttestors() []CompletedAttestor { ctx.mutex.RLock() out := make([]CompletedAttestor, len(ctx.completedAttestors)) diff --git a/attestation/file/file.go b/attestation/file/file.go index 14065d6f..e887afcc 100644 --- a/attestation/file/file.go +++ b/attestation/file/file.go @@ -19,6 +19,7 @@ import ( "os" "path/filepath" + "github.com/gobwas/glob" "github.com/in-toto/go-witness/cryptoutil" "github.com/in-toto/go-witness/log" ) @@ -26,22 +27,40 @@ import ( // recordArtifacts will walk basePath and record the digests of each file with each of the functions in hashes. // If file already exists in baseArtifacts and the two artifacts are equal the artifact will not be in the // returned map of artifacts. -func RecordArtifacts(basePath string, baseArtifacts map[string]cryptoutil.DigestSet, hashes []cryptoutil.DigestValue, visitedSymlinks map[string]struct{}, processWasTraced bool, openedFiles map[string]bool) (map[string]cryptoutil.DigestSet, error) { +func RecordArtifacts(basePath string, baseArtifacts map[string]cryptoutil.DigestSet, hashes []cryptoutil.DigestValue, visitedSymlinks map[string]struct{}, processWasTraced bool, openedFiles map[string]bool, dirHashGlob []glob.Glob) (map[string]cryptoutil.DigestSet, error) { artifacts := make(map[string]cryptoutil.DigestSet) err := filepath.Walk(basePath, func(path string, info fs.FileInfo, err error) error { if err != nil { return err } - if info.IsDir() { - return nil - } - relPath, err := filepath.Rel(basePath, path) if err != nil { return err } + if info.IsDir() { + dirHashMatch := false + for _, globItem := range dirHashGlob { + if !dirHashMatch && globItem.Match(relPath) { + dirHashMatch = true + } + } + + if dirHashMatch { + dir, err := cryptoutil.CalculateDigestSetFromDir(path, hashes) + + if err != nil { + return err + } + + artifacts[relPath+string(os.PathSeparator)] = dir + return filepath.SkipDir + } + + return nil + } + if info.Mode()&fs.ModeSymlink != 0 { // if this is a symlink, eval the true path and eval any artifacts in the symlink. we record every symlink we've visited to prevent infinite loops linkedPath, err := filepath.EvalSymlinks(path) @@ -57,7 +76,7 @@ func RecordArtifacts(basePath string, baseArtifacts map[string]cryptoutil.Digest } visitedSymlinks[linkedPath] = struct{}{} - symlinkedArtifacts, err := RecordArtifacts(linkedPath, baseArtifacts, hashes, visitedSymlinks, processWasTraced, openedFiles) + symlinkedArtifacts, err := RecordArtifacts(linkedPath, baseArtifacts, hashes, visitedSymlinks, processWasTraced, openedFiles, dirHashGlob) if err != nil { return err } diff --git a/attestation/file/file_test.go b/attestation/file/file_test.go index 5379a487..aa180886 100644 --- a/attestation/file/file_test.go +++ b/attestation/file/file_test.go @@ -20,6 +20,7 @@ import ( "path/filepath" "testing" + "github.com/gobwas/glob" "github.com/in-toto/go-witness/cryptoutil" "github.com/stretchr/testify/require" ) @@ -38,13 +39,15 @@ func TestBrokenSymlink(t *testing.T) { symTestDir := filepath.Join(dir, "symTestDir") require.NoError(t, os.Symlink(testDir, symTestDir)) - _, err := RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []cryptoutil.DigestValue{{Hash: crypto.SHA256}}, map[string]struct{}{}, false, map[string]bool{}) + dirHash := make([]glob.Glob, 0) + + _, err := RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []cryptoutil.DigestValue{{Hash: crypto.SHA256}}, map[string]struct{}{}, false, map[string]bool{}, dirHash) require.NoError(t, err) // remove the symlinks and make sure we don't get an error back require.NoError(t, os.RemoveAll(testDir)) require.NoError(t, os.RemoveAll(testFile)) - _, err = RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []cryptoutil.DigestValue{{Hash: crypto.SHA256}}, map[string]struct{}{}, false, map[string]bool{}) + _, err = RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []cryptoutil.DigestValue{{Hash: crypto.SHA256}}, map[string]struct{}{}, false, map[string]bool{}, dirHash) require.NoError(t, err) } @@ -57,7 +60,40 @@ func TestSymlinkCycle(t *testing.T) { symTestDir := filepath.Join(dir, "symTestDir") require.NoError(t, os.Symlink(dir, symTestDir)) + dirHash := make([]glob.Glob, 0) + // if a symlink cycle weren't properly handled this would be an infinite loop - _, err := RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []cryptoutil.DigestValue{{Hash: crypto.SHA256}}, map[string]struct{}{}, false, map[string]bool{}) + _, err := RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []cryptoutil.DigestValue{{Hash: crypto.SHA256}}, map[string]struct{}{}, false, map[string]bool{}, dirHash) + require.NoError(t, err) +} + +func TestDirHash(t *testing.T) { + dir := t.TempDir() + testFile := filepath.Join(dir, "testfile") + require.NoError(t, os.WriteFile(testFile, []byte("some dummy data"), os.ModePerm)) + testDir := filepath.Join(dir, "testdir") + require.NoError(t, os.Mkdir(testDir, os.ModePerm)) + testFile2 := filepath.Join(testDir, "testfile2") + require.NoError(t, os.WriteFile(testFile2, []byte("more dummy data"), os.ModePerm)) + + dirHashGlobs := make([]glob.Glob, 0) + + dirHash := "testdir" + dirHashGlobItem, _ := glob.Compile(dirHash) + dirHashGlobs = append(dirHashGlobs, dirHashGlobItem) + + artifacts, err := RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []cryptoutil.DigestValue{{Hash: crypto.SHA256}}, map[string]struct{}{}, false, map[string]bool{}, dirHashGlobs) require.NoError(t, err) + + // Below command is example usage on the above created scenario for testdir. + // find . -type f | cut -c3- | LC_ALL=C sort | xargs -r sha256sum | sha256sum + dirHashSha256 := "ba9842eac063209c5f67c5a202b2b3a710f8f845f1d064f54af56763645b895b" + + require.Len(t, artifacts, 2) + + dirDigestSet := artifacts["testdir/"] + dirDigestSetMap, err := dirDigestSet.ToNameMap() + require.NoError(t, err) + + require.Equal(t, dirDigestSetMap["dirHash"], dirHashSha256) } diff --git a/attestation/link/link_test.go b/attestation/link/link_test.go index 6da82646..b863be12 100644 --- a/attestation/link/link_test.go +++ b/attestation/link/link_test.go @@ -99,8 +99,8 @@ func TestAttest(t *testing.T) { // Setup Materials m := attestors.NewTestMaterialAttestor() materials := make(map[string]cryptoutil.DigestSet) - materials["test2"] = cryptoutil.DigestSet{{Hash: crypto.SHA256, GitOID: false}: "a53d0741798b287c6dd7afa64aee473f305e65d3f49463bb9d7408ec3b12bf5f"} - materials["test1"] = cryptoutil.DigestSet{{Hash: crypto.SHA256, GitOID: false}: "a53d0741798b287c6dd7afa64aee473f305e65d3f49463bb9d7408ec3b12bf5f"} + materials["test2"] = cryptoutil.DigestSet{{Hash: crypto.SHA256, GitOID: false, DirHash: false}: "a53d0741798b287c6dd7afa64aee473f305e65d3f49463bb9d7408ec3b12bf5f"} + materials["test1"] = cryptoutil.DigestSet{{Hash: crypto.SHA256, GitOID: false, DirHash: false}: "a53d0741798b287c6dd7afa64aee473f305e65d3f49463bb9d7408ec3b12bf5f"} m.SetMaterials(materials) // Setup CommandRun diff --git a/attestation/material/material.go b/attestation/material/material.go index 6b99a4e3..45ae6e1f 100644 --- a/attestation/material/material.go +++ b/attestation/material/material.go @@ -90,7 +90,7 @@ func (a *Attestor) Schema() *jsonschema.Schema { } func (a *Attestor) Attest(ctx *attestation.AttestationContext) error { - materials, err := file.RecordArtifacts(ctx.WorkingDir(), nil, ctx.Hashes(), map[string]struct{}{}, false, map[string]bool{}) + materials, err := file.RecordArtifacts(ctx.WorkingDir(), nil, ctx.Hashes(), map[string]struct{}{}, false, map[string]bool{}, ctx.DirHashGlob()) if err != nil { return err } diff --git a/attestation/product/product.go b/attestation/product/product.go index 8c9d6c34..ada6d77c 100644 --- a/attestation/product/product.go +++ b/attestation/product/product.go @@ -18,6 +18,7 @@ import ( "bytes" "encoding/json" "fmt" + "os" "path/filepath" "github.com/gabriel-vasile/mimetype" @@ -121,11 +122,19 @@ func fromDigestMap(workingDir string, digestMap map[string]cryptoutil.DigestSet) products := make(map[string]attestation.Product) for fileName, digestSet := range digestMap { filePath := filepath.Join(workingDir, fileName) + mimeType, err := getFileContentType(filePath) if err != nil { mimeType = "unknown" } + if mimeType == "application/octet-stream" { + fileInfo, err := os.Stat(filePath) + if err == nil && fileInfo.IsDir() { + mimeType = "text/directory" + } + } + products[fileName] = attestation.Product{ MimeType: mimeType, Digest: digestSet, @@ -199,7 +208,7 @@ func (a *Attestor) Attest(ctx *attestation.AttestationContext) error { } } - products, err := file.RecordArtifacts(ctx.WorkingDir(), a.baseArtifacts, ctx.Hashes(), map[string]struct{}{}, processWasTraced, openedFileSet) + products, err := file.RecordArtifacts(ctx.WorkingDir(), a.baseArtifacts, ctx.Hashes(), map[string]struct{}{}, processWasTraced, openedFileSet, ctx.DirHashGlob()) if err != nil { return err } @@ -237,7 +246,11 @@ func (a *Attestor) Subjects() map[string]cryptoutil.DigestSet { continue } - subjects[fmt.Sprintf("file:%v", productName)] = product.Digest + subjectType := "file" + if product.MimeType == "text/directory" { + subjectType = "dir" + } + subjects[fmt.Sprintf("%v:%v", subjectType, productName)] = product.Digest } return subjects diff --git a/cryptoutil/digestset.go b/cryptoutil/digestset.go index c75d57c0..3b91a8a0 100644 --- a/cryptoutil/digestset.go +++ b/cryptoutil/digestset.go @@ -22,44 +22,64 @@ import ( "hash" "io" "os" + + "golang.org/x/mod/sumdb/dirhash" ) var ( hashNames = map[DigestValue]string{ { - Hash: crypto.SHA256, - GitOID: false, + Hash: crypto.SHA256, + GitOID: false, + DirHash: false, }: "sha256", { - Hash: crypto.SHA1, - GitOID: false, + Hash: crypto.SHA1, + GitOID: false, + DirHash: false, }: "sha1", { - Hash: crypto.SHA256, - GitOID: true, + Hash: crypto.SHA256, + GitOID: true, + DirHash: false, }: "gitoid:sha256", { - Hash: crypto.SHA1, - GitOID: true, + Hash: crypto.SHA1, + GitOID: true, + DirHash: false, }: "gitoid:sha1", + { + Hash: crypto.SHA256, + GitOID: false, + DirHash: true, + }: "dirHash", } hashesByName = map[string]DigestValue{ "sha256": { crypto.SHA256, false, + false, }, "sha1": { crypto.SHA1, false, + false, }, "gitoid:sha256": { crypto.SHA256, true, + false, }, "gitoid:sha1": { crypto.SHA1, true, + false, + }, + "dirHash": { + crypto.SHA256, + false, + true, }, } ) @@ -72,7 +92,8 @@ func (e ErrUnsupportedHash) Error() string { type DigestValue struct { crypto.Hash - GitOID bool + GitOID bool + DirHash bool } func (dv DigestValue) New() hash.Hash { @@ -203,6 +224,19 @@ func CalculateDigestSetFromFile(path string, hashes []DigestValue) (DigestSet, e return CalculateDigestSet(file, hashes) } +func CalculateDigestSetFromDir(dir string, hashes []DigestValue) (DigestSet, error) { + + dirHash, err := dirhash.HashDir(dir, "", DirhHashSha256) + if err != nil { + return nil, err + } + + digestSetByName := make(map[string]string) + digestSetByName["dirHash"] = dirHash + + return NewDigestSet(digestSetByName) +} + func (ds DigestSet) MarshalJSON() ([]byte, error) { nameMap, err := ds.ToNameMap() if err != nil { diff --git a/cryptoutil/dirhash.go b/cryptoutil/dirhash.go new file mode 100644 index 00000000..044a2b15 --- /dev/null +++ b/cryptoutil/dirhash.go @@ -0,0 +1,61 @@ +// Copyright 2022 The Witness Contributors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cryptoutil + +import ( + "crypto/sha256" + "encoding/hex" + "errors" + "fmt" + "io" + "sort" + "strings" +) + +// DirHashSha256 is the "h1:" directory hash function, using SHA-256. +// +// DirHashSha256 returns a SHA-256 hash of a summary +// prepared as if by the Unix command: +// +// sha256sum $(find . -type f | sort) | sha256sum +// +// More precisely, the hashed summary contains a single line for each file in the list, +// ordered by sort.Strings applied to the file names, where each line consists of +// the hexadecimal SHA-256 hash of the file content, +// two spaces (U+0020), the file name, and a newline (U+000A). +// +// File names with newlines (U+000A) are disallowed. +func DirhHashSha256(files []string, open func(string) (io.ReadCloser, error)) (string, error) { + h := sha256.New() + files = append([]string(nil), files...) + sort.Strings(files) + for _, file := range files { + if strings.Contains(file, "\n") { + return "", errors.New("dirhash: filenames with newlines are not supported") + } + r, err := open(file) + if err != nil { + return "", err + } + hf := sha256.New() + _, err = io.Copy(hf, r) + r.Close() + if err != nil { + return "", err + } + fmt.Fprintf(h, "%x %s\n", hf.Sum(nil), file) + } + return hex.EncodeToString(h.Sum(nil)), nil +}