From c80a8a705528bb65b890d8e79b17d343d800a3c8 Mon Sep 17 00:00:00 2001 From: Mikhail Swift Date: Fri, 27 Oct 2023 02:44:31 -0400 Subject: [PATCH] refactor: move gitoid code to cyrptoutil, use digestvalue everywhere When the functionality to calculate gitoids was added, there was a bit of tech debt incurred since they didn't implement hash.Hash. This remedies this with an admitedly hacky implementation of hash.Hash that wraps the gitoid code. This also standardizes our cryptoutil fucntions around the DigestValue struct that was added around this time to differentiate between gitoids and regular hash functions. --- attestation/aws-iid/aws-iid.go | 4 +- attestation/context.go | 10 ++-- attestation/file/file.go | 29 +--------- attestation/file/file_test.go | 6 +- attestation/gcp-iit/gcp-iit.go | 2 +- attestation/git/git.go | 2 +- attestation/github/github.go | 2 +- attestation/gitlab/gitlab.go | 2 +- attestation/maven/maven.go | 2 +- attestation/oci/oci.go | 2 +- attestation/oci/oci_test.go | 2 +- attestation/policyverify/policyverify.go | 2 +- attestation/product/product_test.go | 4 +- cryptoutil/digestset.go | 35 ++++++++---- cryptoutil/gitoid.go | 71 ++++++++++++++++++++++++ 15 files changed, 116 insertions(+), 59 deletions(-) create mode 100644 cryptoutil/gitoid.go diff --git a/attestation/aws-iid/aws-iid.go b/attestation/aws-iid/aws-iid.go index fd9ad36b..ddceee38 100644 --- a/attestation/aws-iid/aws-iid.go +++ b/attestation/aws-iid/aws-iid.go @@ -80,7 +80,7 @@ func init() { type Attestor struct { ec2metadata.EC2InstanceIdentityDocument - hashes []crypto.Hash + hashes []cryptoutil.DigestValue session session.Session conf *aws.Config RawIID string `json:"rawiid"` @@ -195,7 +195,7 @@ func (a *Attestor) Verify() error { } func (a *Attestor) Subjects() map[string]cryptoutil.DigestSet { - hashes := []crypto.Hash{crypto.SHA256} + hashes := []cryptoutil.DigestValue{{Hash: crypto.SHA256}} subjects := make(map[string]cryptoutil.DigestSet) if ds, err := cryptoutil.CalculateDigestSetFromBytes([]byte(a.EC2InstanceIdentityDocument.InstanceID), hashes); err == nil { subjects[fmt.Sprintf("instanceid:%s", a.EC2InstanceIdentityDocument.InstanceID)] = ds diff --git a/attestation/context.go b/attestation/context.go index 80bab148..54ccbdf1 100644 --- a/attestation/context.go +++ b/attestation/context.go @@ -56,7 +56,7 @@ func WithContext(ctx context.Context) AttestationContextOption { } } -func WithHashes(hashes []crypto.Hash) AttestationContextOption { +func WithHashes(hashes []cryptoutil.DigestValue) AttestationContextOption { return func(ctx *AttestationContext) { if len(hashes) > 0 { ctx.hashes = hashes @@ -83,7 +83,7 @@ type AttestationContext struct { ctx context.Context attestors []Attestor workingDir string - hashes []crypto.Hash + hashes []cryptoutil.DigestValue completedAttestors []CompletedAttestor products map[string]Product materials map[string]cryptoutil.DigestSet @@ -104,7 +104,7 @@ func NewContext(attestors []Attestor, opts ...AttestationContextOption) (*Attest ctx: context.Background(), attestors: attestors, workingDir: wd, - hashes: []crypto.Hash{crypto.SHA256}, + hashes: []cryptoutil.DigestValue{{Hash: crypto.SHA256}, {Hash: crypto.SHA256, GitOID: true}, {Hash: crypto.SHA1, GitOID: true}}, materials: make(map[string]cryptoutil.DigestSet), products: make(map[string]Product), } @@ -222,8 +222,8 @@ func (ctx *AttestationContext) WorkingDir() string { return ctx.workingDir } -func (ctx *AttestationContext) Hashes() []crypto.Hash { - hashes := make([]crypto.Hash, len(ctx.hashes)) +func (ctx *AttestationContext) Hashes() []cryptoutil.DigestValue { + hashes := make([]cryptoutil.DigestValue, len(ctx.hashes)) copy(hashes, ctx.hashes) return hashes } diff --git a/attestation/file/file.go b/attestation/file/file.go index 4b648c7f..fe6f4b7d 100644 --- a/attestation/file/file.go +++ b/attestation/file/file.go @@ -15,12 +15,10 @@ package file import ( - "crypto" "io/fs" "os" "path/filepath" - "github.com/edwarnicke/gitoid" "github.com/testifysec/go-witness/cryptoutil" "github.com/testifysec/go-witness/log" ) @@ -28,7 +26,7 @@ import ( // recordArtifacts will walk basePath and record the digests of each file with each of the functions in hashes. // If file already exists in baseArtifacts and the two artifacts are equal the artifact will not be in the // returned map of artifacts. -func RecordArtifacts(basePath string, baseArtifacts map[string]cryptoutil.DigestSet, hashes []crypto.Hash, visitedSymlinks map[string]struct{}) (map[string]cryptoutil.DigestSet, error) { +func RecordArtifacts(basePath string, baseArtifacts map[string]cryptoutil.DigestSet, hashes []cryptoutil.DigestValue, visitedSymlinks map[string]struct{}) (map[string]cryptoutil.DigestSet, error) { artifacts := make(map[string]cryptoutil.DigestSet) err := filepath.Walk(basePath, func(path string, info fs.FileInfo, err error) error { if err != nil { @@ -80,31 +78,6 @@ func RecordArtifacts(basePath string, baseArtifacts map[string]cryptoutil.Digest return err } - fileReader, err := os.Open(path) - if err != nil { - return err - } - - goidSha1, err := gitoid.New(fileReader) - if err != nil { - return err - } - - goidSha256, err := gitoid.New(fileReader, gitoid.WithSha256()) - if err != nil { - return err - } - - artifact[cryptoutil.DigestValue{ - Hash: crypto.SHA1, - GitOID: true, - }] = goidSha1.URI() - - artifact[cryptoutil.DigestValue{ - Hash: crypto.SHA256, - GitOID: true, - }] = goidSha256.URI() - if shouldRecord(relPath, artifact, baseArtifacts) { artifacts[relPath] = artifact } diff --git a/attestation/file/file_test.go b/attestation/file/file_test.go index a4269bf1..1e901ca1 100644 --- a/attestation/file/file_test.go +++ b/attestation/file/file_test.go @@ -38,13 +38,13 @@ func TestBrokenSymlink(t *testing.T) { symTestDir := filepath.Join(dir, "symTestDir") require.NoError(t, os.Symlink(testDir, symTestDir)) - _, err := RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []crypto.Hash{crypto.SHA256}, map[string]struct{}{}) + _, err := RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []cryptoutil.DigestValue{{Hash: crypto.SHA256}}, map[string]struct{}{}) require.NoError(t, err) // remove the symlinks and make sure we don't get an error back require.NoError(t, os.RemoveAll(testDir)) require.NoError(t, os.RemoveAll(testFile)) - _, err = RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []crypto.Hash{crypto.SHA256}, map[string]struct{}{}) + _, err = RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []cryptoutil.DigestValue{{Hash: crypto.SHA256}}, map[string]struct{}{}) require.NoError(t, err) } @@ -58,6 +58,6 @@ func TestSymlinkCycle(t *testing.T) { require.NoError(t, os.Symlink(dir, symTestDir)) // if a symlink cycle weren't properly handled this would be an infinite loop - _, err := RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []crypto.Hash{crypto.SHA256}, map[string]struct{}{}) + _, err := RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []cryptoutil.DigestValue{{Hash: crypto.SHA256}}, map[string]struct{}{}) require.NoError(t, err) } diff --git a/attestation/gcp-iit/gcp-iit.go b/attestation/gcp-iit/gcp-iit.go index d970acc8..a57f71ef 100644 --- a/attestation/gcp-iit/gcp-iit.go +++ b/attestation/gcp-iit/gcp-iit.go @@ -175,7 +175,7 @@ func (a *Attestor) getInstanceData() { func (a *Attestor) Subjects() map[string]cryptoutil.DigestSet { subjects := make(map[string]cryptoutil.DigestSet) - hashes := []crypto.Hash{crypto.SHA256} + hashes := []cryptoutil.DigestValue{{Hash: crypto.SHA256}} if ds, err := cryptoutil.CalculateDigestSetFromBytes([]byte(a.InstanceID), hashes); err == nil { subjects[fmt.Sprintf("instanceid:%v", a.InstanceID)] = ds } else { diff --git a/attestation/git/git.go b/attestation/git/git.go index 2978b4d7..d7405673 100644 --- a/attestation/git/git.go +++ b/attestation/git/git.go @@ -220,7 +220,7 @@ func (a *Attestor) Attest(ctx *attestation.AttestationContext) error { func (a *Attestor) Subjects() map[string]cryptoutil.DigestSet { subjects := make(map[string]cryptoutil.DigestSet) - hashes := []crypto.Hash{crypto.SHA256} + hashes := []cryptoutil.DigestValue{{Hash: crypto.SHA256}} subjectName := fmt.Sprintf("commithash:%v", a.CommitHash) subjects[subjectName] = cryptoutil.DigestSet{ diff --git a/attestation/github/github.go b/attestation/github/github.go index ef394210..42b13c20 100644 --- a/attestation/github/github.go +++ b/attestation/github/github.go @@ -136,7 +136,7 @@ func (a *Attestor) Attest(ctx *attestation.AttestationContext) error { func (a *Attestor) Subjects() map[string]cryptoutil.DigestSet { subjects := make(map[string]cryptoutil.DigestSet) - hashes := []crypto.Hash{crypto.SHA256} + hashes := []cryptoutil.DigestValue{{Hash: crypto.SHA256}} if pipelineSubj, err := cryptoutil.CalculateDigestSetFromBytes([]byte(a.PipelineUrl), hashes); err == nil { subjects[fmt.Sprintf("pipelineurl:%v", a.PipelineUrl)] = pipelineSubj } else { diff --git a/attestation/gitlab/gitlab.go b/attestation/gitlab/gitlab.go index ba0b9517..da6d1045 100644 --- a/attestation/gitlab/gitlab.go +++ b/attestation/gitlab/gitlab.go @@ -118,7 +118,7 @@ func (a *Attestor) Attest(ctx *attestation.AttestationContext) error { func (a *Attestor) Subjects() map[string]cryptoutil.DigestSet { subjects := make(map[string]cryptoutil.DigestSet) - hashes := []crypto.Hash{crypto.SHA256} + hashes := []cryptoutil.DigestValue{{Hash: crypto.SHA256}} if ds, err := cryptoutil.CalculateDigestSetFromBytes([]byte(a.PipelineUrl), hashes); err == nil { subjects[fmt.Sprintf("pipelineurl:%v", a.PipelineUrl)] = ds } else { diff --git a/attestation/maven/maven.go b/attestation/maven/maven.go index 801852a8..794d8282 100644 --- a/attestation/maven/maven.go +++ b/attestation/maven/maven.go @@ -116,7 +116,7 @@ func (a *Attestor) Attest(ctx *attestation.AttestationContext) error { func (a *Attestor) Subjects() map[string]cryptoutil.DigestSet { subjects := make(map[string]cryptoutil.DigestSet) - hashes := []crypto.Hash{crypto.SHA256} + hashes := []cryptoutil.DigestValue{{Hash: crypto.SHA256}} projectSubject := fmt.Sprintf("project:%v/%v@%v", a.GroupId, a.ArtifactId, a.Version) if ds, err := cryptoutil.CalculateDigestSetFromBytes([]byte(projectSubject), hashes); err == nil { subjects[projectSubject] = ds diff --git a/attestation/oci/oci.go b/attestation/oci/oci.go index de8f3386..9bc67c93 100644 --- a/attestation/oci/oci.go +++ b/attestation/oci/oci.go @@ -231,7 +231,7 @@ func (a *Attestor) parseMaifest(ctx *attestation.AttestationContext) error { } func (a *Attestor) Subjects() map[string]cryptoutil.DigestSet { - hashes := []crypto.Hash{crypto.SHA256} + hashes := []cryptoutil.DigestValue{{Hash: crypto.SHA256}} subj := make(map[string]cryptoutil.DigestSet) subj[fmt.Sprintf("manifestdigest:%s", a.ManifestDigest[cryptoutil.DigestValue{Hash: crypto.SHA256}])] = a.ManifestDigest subj[fmt.Sprintf("tardigest:%s", a.TarDigest[cryptoutil.DigestValue{Hash: crypto.SHA256}])] = a.TarDigest diff --git a/attestation/oci/oci_test.go b/attestation/oci/oci_test.go index 4b75a125..cf3bc5bd 100644 --- a/attestation/oci/oci_test.go +++ b/attestation/oci/oci_test.go @@ -92,7 +92,7 @@ func TestAttestor_Attest(t *testing.T) { t.Fatal(err) } - hashes := []crypto.Hash{crypto.SHA256} + hashes := []cryptoutil.DigestValue{{Hash: crypto.SHA256}} tarDigest, err := cryptoutil.CalculateDigestSetFromBytes([]byte(decoded), hashes) if err != nil { diff --git a/attestation/policyverify/policyverify.go b/attestation/policyverify/policyverify.go index c956512c..d936c4d7 100644 --- a/attestation/policyverify/policyverify.go +++ b/attestation/policyverify/policyverify.go @@ -168,7 +168,7 @@ func (vo *Attestor) Attest(ctx *attestation.AttestationContext) error { } func calculateDigest(b []byte) (cryptoutil.DigestSet, error) { - return cryptoutil.CalculateDigestSetFromBytes(b, []crypto.Hash{crypto.SHA256}) + return cryptoutil.CalculateDigestSetFromBytes(b, []cryptoutil.DigestValue{{Hash: crypto.SHA256}}) } func verificationSummaryFromResults(policyEnvelope dsse.Envelope, policyResult policy.PolicyResult, accepted bool) (slsa.VerificationSummary, error) { diff --git a/attestation/product/product_test.go b/attestation/product/product_test.go index ece998d9..5560276c 100644 --- a/attestation/product/product_test.go +++ b/attestation/product/product_test.go @@ -30,7 +30,7 @@ import ( ) func TestFromDigestMap(t *testing.T) { - testDigest, err := cryptoutil.CalculateDigestSetFromBytes([]byte("test"), []crypto.Hash{crypto.SHA256}) + testDigest, err := cryptoutil.CalculateDigestSetFromBytes([]byte("test"), []cryptoutil.DigestValue{{Hash: crypto.SHA256}}) assert.NoError(t, err) testDigestSet := make(map[string]cryptoutil.DigestSet) testDigestSet["test"] = testDigest @@ -56,7 +56,7 @@ func TestAttestorRunType(t *testing.T) { func TestAttestorAttest(t *testing.T) { a := New() - testDigest, err := cryptoutil.CalculateDigestSetFromBytes([]byte("test"), []crypto.Hash{crypto.SHA256}) + testDigest, err := cryptoutil.CalculateDigestSetFromBytes([]byte("test"), []cryptoutil.DigestValue{{Hash: crypto.SHA256}}) if err != nil { t.Errorf("Failed to calculate digest set from bytes: %v", err) } diff --git a/cryptoutil/digestset.go b/cryptoutil/digestset.go index d97ef70a..231289a6 100644 --- a/cryptoutil/digestset.go +++ b/cryptoutil/digestset.go @@ -75,6 +75,14 @@ type DigestValue struct { GitOID bool } +func (dv DigestValue) New() hash.Hash { + if dv.GitOID { + return &gitoidHasher{hash: dv.Hash, buf: &bytes.Buffer{}} + } + + return dv.Hash.New() +} + type DigestSet map[DigestValue]string func HashToString(h crypto.Hash) (string, error) { @@ -142,13 +150,13 @@ func NewDigestSet(digestsByName map[string]string) (DigestSet, error) { return ds, nil } -func CalculateDigestSet(r io.Reader, hashes []crypto.Hash) (DigestSet, error) { +func CalculateDigestSet(r io.Reader, digestValues []DigestValue) (DigestSet, error) { digestSet := make(DigestSet) writers := []io.Writer{} - hashfuncs := map[crypto.Hash]hash.Hash{} - for _, hash := range hashes { - hashfunc := hash.New() - hashfuncs[hash] = hashfunc + hashfuncs := map[DigestValue]hash.Hash{} + for _, digestValue := range digestValues { + hashfunc := digestValue.New() + hashfuncs[digestValue] = hashfunc writers = append(writers, hashfunc) } @@ -157,21 +165,26 @@ func CalculateDigestSet(r io.Reader, hashes []crypto.Hash) (DigestSet, error) { return digestSet, err } - for hash, hashfunc := range hashfuncs { - digestValue := DigestValue{ - Hash: hash, - GitOID: false, + for digestValue, hashfunc := range hashfuncs { + // gitoids are somewhat special... we're using a custom implementation of hash.Hash + // to wrap the gitoid library. Sum will return a gitoid URI, so we don't want to hex + // encode it. + if digestValue.GitOID { + digestSet[digestValue] = string(hashfunc.Sum([]byte("test"))) + continue } + digestSet[digestValue] = string(HexEncode(hashfunc.Sum(nil))) } + return digestSet, nil } -func CalculateDigestSetFromBytes(data []byte, hashes []crypto.Hash) (DigestSet, error) { +func CalculateDigestSetFromBytes(data []byte, hashes []DigestValue) (DigestSet, error) { return CalculateDigestSet(bytes.NewReader(data), hashes) } -func CalculateDigestSetFromFile(path string, hashes []crypto.Hash) (DigestSet, error) { +func CalculateDigestSetFromFile(path string, hashes []DigestValue) (DigestSet, error) { file, err := os.Open(path) if err != nil { return DigestSet{}, err diff --git a/cryptoutil/gitoid.go b/cryptoutil/gitoid.go new file mode 100644 index 00000000..bf224161 --- /dev/null +++ b/cryptoutil/gitoid.go @@ -0,0 +1,71 @@ +package cryptoutil + +import ( + "bytes" + "crypto" + "encoding/hex" + "fmt" + + "github.com/edwarnicke/gitoid" +) + +// gitoidHasher implements io.Writer so we can generate gitoids with our CalculateDigestSet function. +// CalculateDigestSet takes in an io.Reader pointing to some data we want to hash, and writes it to a +// MultiWriter that forwards it to writers for each hash we wish to calculate. +// This is a bit hacky -- it maintains an internal buffer and then when asked for the Sum, it calculates +// the gitoid. We may be able to contribute to the gitoid library to make this smoother +type gitoidHasher struct { + buf *bytes.Buffer + hash crypto.Hash +} + +// Write implments the io.Writer interface, and writes to the internal buffer +func (gh *gitoidHasher) Write(p []byte) (n int, err error) { + return gh.buf.Write(p) +} + +// Sum appends the current hash to b and returns the resulting slice. +// It does not change the underlying hash state. +func (gh *gitoidHasher) Sum(b []byte) []byte { + opts := []gitoid.Option{} + if gh.hash == crypto.SHA256 { + opts = append(opts, gitoid.WithSha256()) + } + + g, err := gitoid.New(gh.buf, opts...) + if err != nil { + return []byte{} + } + + return append(b, []byte(g.URI())...) +} + +// Reset resets the Hash to its initial state. +func (gh *gitoidHasher) Reset() { + gh.buf = &bytes.Buffer{} +} + +// Size returns the number of bytes Sum will return. +func (gh *gitoidHasher) Size() int { + hashName, err := HashToString(gh.hash) + if err != nil { + return 0 + } + + // this is somewhat fragile and knows too much about the internals of the gitoid code... + // we're assuming that the default gitoid content type will remain BLOB, and that our + // string representations of hash functions will remain consistent with their... + // and that the URI format will remain consistent. + // this should probably be changed, and this entire thing could maybe be upstreamed to the + // gitoid library. + return len(fmt.Sprintf("gitoid:%s:%s:", gitoid.BLOB, hashName)) + hex.EncodedLen(gh.hash.Size()) +} + +// BlockSize returns the hash's underlying block size. +// The Write method must be able to accept any amount +// of data, but it may operate more efficiently if all writes +// are a multiple of the block size. +func (gh *gitoidHasher) BlockSize() int { + hf := gh.hash.New() + return hf.BlockSize() +}