Skip to content

Commit

Permalink
slicer: Track and record paths in DB
Browse files Browse the repository at this point in the history
This commit completes the Chisel DB implementation by recording path
values in the DB.

Recording paths is trickier because

  1. we don't know which paths to record until all packages are sliced
     because of glob patterns,
  2. we need to record paths to slices relationship even for implicitly
     created parent directories, and
  3. we want to compute digests only once for source paths with multiple
     target paths.

This commit introduces the pathTracker interface and implementation that
tackles the problems above. Its methods are called in various places in
slicer and as callbacks in deb/extract to update the state of tracked
paths. At a high level, these are the steps taken to track paths:

  1. The callback interface in deb/extract is utilized to track created
     paths, their attributes, and their content digests.
  2. The same information is tracked for non-extracted content paths.
  3. Until-paths are untracked.
  4. Slices are assigned to both requested paths and their implicit
     parent directories.
  5. Digests of mutated files are updated.

After that, the tracked paths are recorded in the DB.

Subjective performance impact is minimal.

Note that copyright files are not owned by any slice. It is a bug that
will be fixed in future commits. That's why the nil Slices attribute was
kept in expected DB objects in test cases.
  • Loading branch information
woky committed Jun 22, 2023
1 parent 08cfdd3 commit c56fd2f
Show file tree
Hide file tree
Showing 3 changed files with 1,681 additions and 13 deletions.
212 changes: 212 additions & 0 deletions internal/slicer/pathtrack.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
package slicer

import (
"crypto/sha256"
"fmt"
"io"
"io/fs"
"io/ioutil"
"path/filepath"

"github.com/canonical/chisel/internal/db"
"github.com/canonical/chisel/internal/deb"
"github.com/canonical/chisel/internal/fsutil"
"github.com/canonical/chisel/internal/lib/sortedset"
"github.com/canonical/chisel/internal/strdist"
)

type pathTracker interface {
// initialization
addSlicePath(slice, path string)
addSliceGlob(slice, glob string)
// extraction
onData(source string, size int64) (deb.ConsumeData, error)
onCreate(source, target, link string, mode fs.FileMode) error
// non-extracted content
addTarget(target, link string, mode fs.FileMode, data []byte)
// mutation scripts
markMutated(target string)
// until paths
removeTarget(target string)
// reconcile changes above
updateTargets(root string) error
// write results
updateDB(addToDB AddToDB) error
}

type contentInfo struct {
size int64
digest *[sha256.Size]byte
}

func computeDigest(data []byte) *[sha256.Size]byte {
digest := sha256.Sum256(data)
return &digest
}

type pathTrackCtx struct {
pathSlices map[string]sortedset.String
globSlices map[string]sortedset.String
targetToSource map[string]string
sourceContent map[string]contentInfo
targets map[string]*db.Path
mutatedTargets map[string]bool
}

var _ pathTracker = (*pathTrackCtx)(nil)

func newPathTracker() pathTracker {
return &pathTrackCtx{
pathSlices: make(map[string]sortedset.String),
globSlices: make(map[string]sortedset.String),
targetToSource: make(map[string]string),
sourceContent: make(map[string]contentInfo),
targets: make(map[string]*db.Path),
mutatedTargets: make(map[string]bool),
}
}

func (ctx *pathTrackCtx) addSlicePath(slice, path string) {
ctx.pathSlices[path] = ctx.pathSlices[path].AddMany(slice)
}

func (ctx *pathTrackCtx) addSliceGlob(slice, glob string) {
ctx.globSlices[glob] = ctx.pathSlices[glob].AddMany(slice)
}

func (ctx *pathTrackCtx) onData(source string, size int64) (deb.ConsumeData, error) {
// XXX: We should return nil if the source matches one of the
// until-paths. But that would require some additional expensive
// tracking. Until-paths are now untracked by removeTarget() called
// during their removal from the output directory.
consume := func(reader io.Reader) error {
data, err := ioutil.ReadAll(reader)
if err != nil {
return err
}
digest := computeDigest(data)
ctx.sourceContent[source] = contentInfo{size, digest}
return nil
}
return consume, nil
}

func (ctx *pathTrackCtx) onCreate(source, target, link string, mode fs.FileMode) error {
info := db.Path{
Path: target,
Mode: mode,
Link: link,
}
ctx.targets[target] = &info
ctx.targetToSource[target] = source
return nil
}

func (ctx *pathTrackCtx) addTarget(target, link string, mode fs.FileMode, data []byte) {
info := db.Path{
Path: target,
Mode: mode,
Link: link,
}
if data != nil {
info.Size = int64(len(data))
info.SHA256 = computeDigest(data)
}
ctx.targets[target] = &info
// add parents
for parent := fsutil.Dir(target); parent != "/"; parent = fsutil.Dir(parent) {
if _, ok := ctx.targets[parent]; ok {
break
}
ctx.targets[parent] = &db.Path{
Path: parent,
Mode: fs.ModeDir | 0755,
}
}
}

func (ctx *pathTrackCtx) markMutated(target string) {
ctx.mutatedTargets[target] = true
}

func (ctx *pathTrackCtx) removeTarget(target string) {
delete(ctx.targets, target)
}

func (ctx *pathTrackCtx) completeTarget(info *db.Path) {
// keep only permission bits
info.Mode = info.Mode & 07777

// copy content info from OnData callbacks
source := ctx.targetToSource[info.Path]
if content, ok := ctx.sourceContent[source]; ok {
info.Size = content.size
info.SHA256 = content.digest
}

// assign slices
slices := ctx.pathSlices[info.Path]
for glob, globSlices := range ctx.globSlices {
if strdist.GlobPath(glob, info.Path) {
slices = slices.AddMany(globSlices...)
}
}

// assign slices to parents
path := info.Path
for len(slices) > 0 && path != "/" {
newSlices := []string{}
for _, sl := range slices {
if tmp, ok := sortedset.String(info.Slices).AddOne(sl); ok {
info.Slices = tmp
newSlices = append(newSlices, sl)
}
}
slices = newSlices
path = fsutil.Dir(path)
info = ctx.targets[path]
}
}

// set final digest on mutated files
func (ctx *pathTrackCtx) refreshTarget(info *db.Path, root string) error {
if !ctx.mutatedTargets[info.Path] || info.SHA256 != nil {
// not mutated or not a regular file
return nil
}
local := filepath.Join(root, info.Path)
data, err := ioutil.ReadFile(local)
if err != nil {
return err
}
finalDigest := computeDigest(data)
if *finalDigest != *info.SHA256 {
info.FinalSHA256 = finalDigest
}
return nil
}

func (ctx *pathTrackCtx) updateTargets(root string) (err error) {
for _, info := range ctx.targets {
ctx.completeTarget(info)
if err = ctx.refreshTarget(info, root); err != nil {
break
}
}
return
}

func (ctx *pathTrackCtx) updateDB(addToDB AddToDB) error {
for _, info := range ctx.targets {
if err := addToDB(*info); err != nil {
return fmt.Errorf("cannot write path to db: %w", err)
}
for _, sl := range info.Slices {
content := db.Content{sl, info.Path}
if err := addToDB(content); err != nil {
return fmt.Errorf("cannot write content to db: %w", err)
}
}
}
return nil
}
56 changes: 43 additions & 13 deletions internal/slicer/slicer.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ func Run(options *RunOptions) error {
if addToDB == nil {
addToDB = func(value any) error { return nil }
}
pathTrack := newPathTracker()

knownPaths["/"] = true

Expand Down Expand Up @@ -140,6 +141,11 @@ func Run(options *RunOptions) error {
})
}
}
if pathInfo.Kind == setup.GlobPath {
pathTrack.addSliceGlob(pkgSlice, targetPath)
} else {
pathTrack.addSlicePath(pkgSlice, targetPath)
}
}
if !hasCopyright {
extractPackage[copyrightPath] = append(extractPackage[copyrightPath], deb.ExtractInfo{
Expand Down Expand Up @@ -176,6 +182,8 @@ func Run(options *RunOptions) error {
Extract: extract[slice.Package],
TargetDir: targetDir,
Globbed: globbedPaths,
OnData: pathTrack.onData,
OnCreate: pathTrack.onCreate,
})
reader.Close()
packages[slice.Package] = nil
Expand All @@ -202,7 +210,7 @@ func Run(options *RunOptions) error {
continue
}
done[targetPath] = true
targetPath = filepath.Join(targetDir, targetPath)
localPath := filepath.Join(targetDir, targetPath)
targetMode := pathInfo.Mode
if targetMode == 0 {
if pathInfo.Kind == setup.DirPath {
Expand All @@ -214,7 +222,7 @@ func Run(options *RunOptions) error {

// Leverage tar handling of mode bits.
tarHeader := tar.Header{Mode: int64(targetMode)}
var fileContent io.Reader
var fileContent *bytes.Buffer
var linkTarget string
switch pathInfo.Kind {
case setup.TextPath:
Expand All @@ -228,10 +236,17 @@ func Run(options *RunOptions) error {
default:
return fmt.Errorf("internal error: cannot extract path of kind %q", pathInfo.Kind)
}
fsMode := tarHeader.FileInfo().Mode()

var data []byte
if fileContent != nil {
data = fileContent.Bytes()
}
pathTrack.addTarget(targetPath, linkTarget, fsMode, data)

err := fsutil.Create(&fsutil.CreateOptions{
Path: targetPath,
Mode: tarHeader.FileInfo().Mode(),
Path: localPath,
Mode: fsMode,
Data: fileContent,
Link: linkTarget,
Dirs: true,
Expand All @@ -248,6 +263,7 @@ func Run(options *RunOptions) error {
if !pathInfos[path].Mutable {
return fmt.Errorf("cannot write file which is not mutable: %s", path)
}
pathTrack.markMutated(path)
return nil
}
checkRead := func(path string) error {
Expand Down Expand Up @@ -292,6 +308,10 @@ func Run(options *RunOptions) error {
}
}

if err := pathTrack.updateTargets(targetDir); err != nil {
return err
}

var untilDirs []string
for targetPath, pathInfo := range pathInfos {
if pathInfo.Until == setup.UntilMutate {
Expand All @@ -302,28 +322,38 @@ func Run(options *RunOptions) error {
targetPaths = []string{targetPath}
}
for _, targetPath := range targetPaths {
if strings.HasSuffix(targetPath, "/") {
untilDirs = append(untilDirs, targetPath)
continue
}
realPath, err := content.RealPath(targetPath, scripts.CheckRead)
if err == nil {
if strings.HasSuffix(targetPath, "/") {
untilDirs = append(untilDirs, realPath)
} else {
err = os.Remove(realPath)
}
err = os.Remove(realPath)
}
if err != nil {
return fmt.Errorf("cannot perform 'until' removal: %w", err)
}
pathTrack.removeTarget(targetPath)
}
}
}
for _, realPath := range untilDirs {
err := os.Remove(realPath)
// The non-empty directory error is caught by IsExist as well.
if err != nil && !os.IsExist(err) {
for _, targetPath := range untilDirs {
realPath, err := content.RealPath(targetPath, scripts.CheckRead)
if err == nil {
err = os.Remove(realPath)
}
if err == nil {
pathTrack.removeTarget(targetPath)
} else if !os.IsExist(err) {
// The non-empty directory error is caught by IsExist as well.
return fmt.Errorf("cannot perform 'until' removal: %#v", err)
}
}

if err := pathTrack.updateDB(addToDB); err != nil {
return err
}

return nil
}

Expand Down
Loading

0 comments on commit c56fd2f

Please sign in to comment.