Skip to content

Commit

Permalink
feat: Adding functionality for dirhash.
Browse files Browse the repository at this point in the history
Signed-off-by: Matthias Glastra <[email protected]>
  • Loading branch information
matglas committed May 20, 2024
1 parent 5e04111 commit 3723727
Show file tree
Hide file tree
Showing 7 changed files with 198 additions and 22 deletions.
37 changes: 29 additions & 8 deletions attestation/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"os"
"time"

"github.com/gobwas/glob"
"github.com/in-toto/go-witness/cryptoutil"
"github.com/in-toto/go-witness/log"
)
Expand Down Expand Up @@ -82,6 +83,20 @@ func WithWorkingDir(workingDir string) AttestationContextOption {
}
}

func WithDirHashGlob(dirHashGlob []string) AttestationContextOption {
return func(ctx *AttestationContext) {
if len(dirHashGlob) > 0 {
ctx.dirHashGlob = dirHashGlob

ctx.dirHashGlobCompiled = make([]glob.Glob, len(ctx.dirHashGlob))
for i, dirHashGlobItem := range dirHashGlob {
dirHashGlobItemCompiled, _ := glob.Compile(dirHashGlobItem)
ctx.dirHashGlobCompiled[i] = dirHashGlobItemCompiled
}
}
}
}

type CompletedAttestor struct {
Attestor Attestor
StartTime time.Time
Expand All @@ -90,14 +105,16 @@ type CompletedAttestor struct {
}

type AttestationContext struct {
ctx context.Context
attestors []Attestor
workingDir string
hashes []cryptoutil.DigestValue
completedAttestors []CompletedAttestor
products map[string]Product
materials map[string]cryptoutil.DigestSet
stepName string
ctx context.Context
attestors []Attestor
workingDir string
dirHashGlob []string
dirHashGlobCompiled []glob.Glob
hashes []cryptoutil.DigestValue
completedAttestors []CompletedAttestor
products map[string]Product
materials map[string]cryptoutil.DigestSet
stepName string
}

type Product struct {
Expand Down Expand Up @@ -185,6 +202,10 @@ func (ctx *AttestationContext) runAttestor(attestor Attestor) {
}
}

func (ctx *AttestationContext) DirHashGlob() []glob.Glob {
return ctx.dirHashGlobCompiled
}

func (ctx *AttestationContext) CompletedAttestors() []CompletedAttestor {
out := make([]CompletedAttestor, len(ctx.completedAttestors))
copy(out, ctx.completedAttestors)
Expand Down
31 changes: 25 additions & 6 deletions attestation/file/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,29 +19,48 @@ import (
"os"
"path/filepath"

"github.com/gobwas/glob"
"github.com/in-toto/go-witness/cryptoutil"
"github.com/in-toto/go-witness/log"
)

// recordArtifacts will walk basePath and record the digests of each file with each of the functions in hashes.
// If file already exists in baseArtifacts and the two artifacts are equal the artifact will not be in the
// returned map of artifacts.
func RecordArtifacts(basePath string, baseArtifacts map[string]cryptoutil.DigestSet, hashes []cryptoutil.DigestValue, visitedSymlinks map[string]struct{}) (map[string]cryptoutil.DigestSet, error) {
func RecordArtifacts(basePath string, baseArtifacts map[string]cryptoutil.DigestSet, hashes []cryptoutil.DigestValue, visitedSymlinks map[string]struct{}, dirHashGlob []glob.Glob) (map[string]cryptoutil.DigestSet, error) {
artifacts := make(map[string]cryptoutil.DigestSet)
err := filepath.Walk(basePath, func(path string, info fs.FileInfo, err error) error {
if err != nil {
return err
}

if info.IsDir() {
return nil
}

relPath, err := filepath.Rel(basePath, path)
if err != nil {
return err
}

if info.IsDir() {
dirHashMatch := false
for _, globItem := range dirHashGlob {
if !dirHashMatch && globItem.Match(relPath) {
dirHashMatch = true
}
}

if dirHashMatch {
dir, _ := cryptoutil.CalculateDigestSetFromDir(path, hashes)

if err != nil {
return err
}

artifacts[relPath+string(os.PathSeparator)] = dir
return filepath.SkipDir
}

return nil
}

if info.Mode()&fs.ModeSymlink != 0 {
// if this is a symlink, eval the true path and eval any artifacts in the symlink. we record every symlink we've visited to prevent infinite loops
linkedPath, err := filepath.EvalSymlinks(path)
Expand All @@ -57,7 +76,7 @@ func RecordArtifacts(basePath string, baseArtifacts map[string]cryptoutil.Digest
}

visitedSymlinks[linkedPath] = struct{}{}
symlinkedArtifacts, err := RecordArtifacts(linkedPath, baseArtifacts, hashes, visitedSymlinks)
symlinkedArtifacts, err := RecordArtifacts(linkedPath, baseArtifacts, hashes, visitedSymlinks, dirHashGlob)
if err != nil {
return err
}
Expand Down
42 changes: 39 additions & 3 deletions attestation/file/file_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"path/filepath"
"testing"

"github.com/gobwas/glob"
"github.com/in-toto/go-witness/cryptoutil"
"github.com/stretchr/testify/require"
)
Expand All @@ -38,13 +39,15 @@ func TestBrokenSymlink(t *testing.T) {
symTestDir := filepath.Join(dir, "symTestDir")
require.NoError(t, os.Symlink(testDir, symTestDir))

_, err := RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []cryptoutil.DigestValue{{Hash: crypto.SHA256}}, map[string]struct{}{})
dirHash := make([]glob.Glob, 0)

_, err := RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []cryptoutil.DigestValue{{Hash: crypto.SHA256}}, map[string]struct{}{}, dirHash)
require.NoError(t, err)

// remove the symlinks and make sure we don't get an error back
require.NoError(t, os.RemoveAll(testDir))
require.NoError(t, os.RemoveAll(testFile))
_, err = RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []cryptoutil.DigestValue{{Hash: crypto.SHA256}}, map[string]struct{}{})
_, err = RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []cryptoutil.DigestValue{{Hash: crypto.SHA256}}, map[string]struct{}{}, dirHash)
require.NoError(t, err)
}

Expand All @@ -57,7 +60,40 @@ func TestSymlinkCycle(t *testing.T) {
symTestDir := filepath.Join(dir, "symTestDir")
require.NoError(t, os.Symlink(dir, symTestDir))

dirHash := make([]glob.Glob, 0)

// if a symlink cycle weren't properly handled this would be an infinite loop
_, err := RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []cryptoutil.DigestValue{{Hash: crypto.SHA256}}, map[string]struct{}{})
_, err := RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []cryptoutil.DigestValue{{Hash: crypto.SHA256}}, map[string]struct{}{}, dirHash)
require.NoError(t, err)
}

func TestDirHash(t *testing.T) {
dir := t.TempDir()
testFile := filepath.Join(dir, "testfile")
require.NoError(t, os.WriteFile(testFile, []byte("some dummy data"), os.ModePerm))
testDir := filepath.Join(dir, "testdir")
require.NoError(t, os.Mkdir(testDir, os.ModePerm))
testFile2 := filepath.Join(testDir, "testfile2")
require.NoError(t, os.WriteFile(testFile2, []byte("more dummy data"), os.ModePerm))

dirHashGlobs := make([]glob.Glob, 0)

dirHash := "testdir"
dirHashGlobItem, _ := glob.Compile(dirHash)
dirHashGlobs = append(dirHashGlobs, dirHashGlobItem)

artifacts, err := RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []cryptoutil.DigestValue{{Hash: crypto.SHA256}}, map[string]struct{}{}, dirHashGlobs)
require.NoError(t, err)

// Below command is example usage on the above created scenario for testdir.
// find . -type f | cut -c3- | LC_ALL=C sort | xargs -r sha256sum | sha256sum
dirHashSha256 := "ba9842eac063209c5f67c5a202b2b3a710f8f845f1d064f54af56763645b895b"

require.Len(t, artifacts, 2)

dirDigestSet := artifacts["testdir/"]
dirDigestSetMap, err := dirDigestSet.ToNameMap()
require.NoError(t, err)

require.Equal(t, dirDigestSetMap["sha256"], dirHashSha256)
}
2 changes: 1 addition & 1 deletion attestation/material/material.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ func (a *Attestor) Schema() *jsonschema.Schema {
}

func (a *Attestor) Attest(ctx *attestation.AttestationContext) error {
materials, err := file.RecordArtifacts(ctx.WorkingDir(), nil, ctx.Hashes(), map[string]struct{}{})
materials, err := file.RecordArtifacts(ctx.WorkingDir(), nil, ctx.Hashes(), map[string]struct{}{}, ctx.DirHashGlob())
if err != nil {
return err
}
Expand Down
24 changes: 20 additions & 4 deletions attestation/product/product.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,11 +122,23 @@ func fromDigestMap(digestMap map[string]cryptoutil.DigestSet) map[string]attesta
products := make(map[string]attestation.Product)
for fileName, digestSet := range digestMap {
mimeType := "unknown"

f, err := os.OpenFile(fileName, os.O_RDONLY, 0666)
if err == nil {
mimeType, err = getFileContentType(f)
// This returns an *os.FileInfo type
fileInfo, err := f.Stat()
if err != nil {
mimeType = "unknown"
// error handling
}

// IsDir is short for fileInfo.Mode().IsDir()
if fileInfo.IsDir() {
mimeType = "text/directory"
} else {
mimeType, err = getFileContentType(f)
if err != nil {
mimeType = "unknown"
}
}
f.Close()
}
Expand Down Expand Up @@ -187,7 +199,7 @@ func (a *Attestor) Attest(ctx *attestation.AttestationContext) error {
a.compiledExcludeGlob = compiledExcludeGlob

a.baseArtifacts = ctx.Materials()
products, err := file.RecordArtifacts(ctx.WorkingDir(), a.baseArtifacts, ctx.Hashes(), map[string]struct{}{})
products, err := file.RecordArtifacts(ctx.WorkingDir(), a.baseArtifacts, ctx.Hashes(), map[string]struct{}{}, ctx.DirHashGlob())
if err != nil {
return err
}
Expand Down Expand Up @@ -225,7 +237,11 @@ func (a *Attestor) Subjects() map[string]cryptoutil.DigestSet {
continue
}

subjects[fmt.Sprintf("file:%v", productName)] = product.Digest
subjectType := "file"
if product.MimeType == "text/directory" {
subjectType = "dir"
}
subjects[fmt.Sprintf("%v:%v", subjectType, productName)] = product.Digest
}

return subjects
Expand Down
23 changes: 23 additions & 0 deletions cryptoutil/digestset.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import (
"hash"
"io"
"os"

"golang.org/x/mod/sumdb/dirhash"
)

var (
Expand All @@ -42,6 +44,10 @@ var (
Hash: crypto.SHA1,
GitOID: true,
}: "gitoid:sha1",
{
Hash: crypto.SHA256,
GitOID: false,
}: "dirHash",
}

hashesByName = map[string]DigestValue{
Expand All @@ -61,6 +67,10 @@ var (
crypto.SHA1,
true,
},
"dirHash": {
crypto.SHA256,
false,
},
}
)

Expand Down Expand Up @@ -203,6 +213,19 @@ func CalculateDigestSetFromFile(path string, hashes []DigestValue) (DigestSet, e
return CalculateDigestSet(file, hashes)
}

func CalculateDigestSetFromDir(dir string, hashes []DigestValue) (DigestSet, error) {

dirHash, err := dirhash.HashDir(dir, "", DirhHashSha256)
if err != nil {
return nil, err
}

digestSetByName := make(map[string]string)
digestSetByName["dirHash"] = dirHash

return NewDigestSet(digestSetByName)
}

func (ds DigestSet) MarshalJSON() ([]byte, error) {
nameMap, err := ds.ToNameMap()
if err != nil {
Expand Down
61 changes: 61 additions & 0 deletions cryptoutil/dirhash.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Copyright 2022 The Witness Contributors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package cryptoutil

import (
"crypto/sha256"
"encoding/hex"
"errors"
"fmt"
"io"
"sort"
"strings"
)

// DirHashSha256 is the "h1:" directory hash function, using SHA-256.
//
// DirHashSha256 returns a SHA-256 hash of a summary
// prepared as if by the Unix command:
//
// sha256sum $(find . -type f | sort) | sha256sum
//
// More precisely, the hashed summary contains a single line for each file in the list,
// ordered by sort.Strings applied to the file names, where each line consists of
// the hexadecimal SHA-256 hash of the file content,
// two spaces (U+0020), the file name, and a newline (U+000A).
//
// File names with newlines (U+000A) are disallowed.
func DirhHashSha256(files []string, open func(string) (io.ReadCloser, error)) (string, error) {
h := sha256.New()
files = append([]string(nil), files...)
sort.Strings(files)
for _, file := range files {
if strings.Contains(file, "\n") {
return "", errors.New("dirhash: filenames with newlines are not supported")
}
r, err := open(file)
if err != nil {
return "", err
}
hf := sha256.New()
_, err = io.Copy(hf, r)
r.Close()
if err != nil {
return "", err
}
fmt.Fprintf(h, "%x %s\n", hf.Sum(nil), file)
}
return hex.EncodeToString(h.Sum(nil)), nil
}

0 comments on commit 3723727

Please sign in to comment.