Skip to content

Commit

Permalink
extract: Add callbacks
Browse files Browse the repository at this point in the history
This commit adds support for callbacks from deb/extract.go. These
callbacks will be used by the Chisel DB implementation that will follow
to record metadata about installed paths.

The reason for adding callbacks instead of handling path recording in
deb/extract is to keep the concerns separated between internal packages.
In Chisel DB we need to record paths to slices relationship and digests
of regular files. But deb/extract doesn't know about slices. slicer on
the other hand knows about slices, and so it's the proper place to drive
Chisel DB creation. But slicer needs to cooperate with deb/extract
because it doesn't know which paths will be matched by globs and because
it needs the content of matched regular files. The callbacks provide a
mechanism for slicer to accomplish this.
  • Loading branch information
woky committed Oct 9, 2023
1 parent c48fab9 commit b653917
Show file tree
Hide file tree
Showing 2 changed files with 312 additions and 17 deletions.
101 changes: 84 additions & 17 deletions internal/deb/extract.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,56 @@ import (
"github.com/canonical/chisel/internal/strdist"
)

type ConsumeData func(reader io.Reader) error

// DataCallback function is called when a tarball entry of a regular file is
// going to be extracted.
//
// The source and size parameters are set to the file's path in the tarball and
// the size of its content, respectively. When the returned ConsumeData function
// is not nil, it is called with the file's content.
//
// When the source path occurs in the tarball more than once, this function will
// be called for each occurrence.
//
// If either DataCallback or ConsumeData function returns a non-nil error, the
// Extract() function will fail with that error.
type DataCallback func(source string, size int64) (ConsumeData, error)

// The CreateCallback function is called when a tarball entry is going to be
// extracted to a target path.
//
// The target, link, and mode parameters are set to the target's path, symbolic
// link target, and mode, respectively. The target's filesystem type can be
// determined from these parameters. If the link is not empty, the target is a
// symbolic link. Otherwise, if the target's path ends with /, it is a
// directory. Otherwise, it is a regular file.
//
// When the source parameter is not empty, the target is going to be extracted
// from a tarball entry with the source path. The function may be called more
// than once with the same source when the tarball entry is extracted to
// multiple different targets.
//
// Otherwise, the mode is 0755 and the target is going to be an implicitly
// created parent directory of another target, and when a directory entry with
// that path is later encountered in the tarball with a different mode, the
// function will be called again with the same target, source equal to the
// target, and the different mode.
//
// When the source path occurs in the tarball more than once, this function will
// be called for each target path for each occurrence.
//
// If CreateCallback function returns a non-nil error, the Extract() function
// will fail with that error.
type CreateCallback func(source, target, link string, mode fs.FileMode) error

type ExtractOptions struct {
Package string
TargetDir string
Extract map[string][]ExtractInfo
Globbed map[string][]string
OnData DataCallback
OnCreate CreateCallback
}

type ExtractInfo struct {
Expand Down Expand Up @@ -209,14 +254,21 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error {
return nil
}
info := dirInfos[dir]
source := dir
if info.created {
return nil
} else if info.mode == 0 {
info.mode = fs.ModeDir | 0755
source = ""
}
if err := createParents(dir); err != nil {
return err
}
if options.OnCreate != nil {
if err := options.OnCreate(source, dir, "", info.mode); err != nil {
return err
}
}
create := fsutil.CreateOptions{
Path: filepath.Join(options.TargetDir, dir),
Mode: info.mode,
Expand All @@ -229,27 +281,37 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error {
return nil
}

var contentCache []byte
var contentIsCached = len(extractInfos) > 1 && sourceMode.IsRegular() && globPath == ""
if contentIsCached {
// Read and cache the content so it may be reused.
// As an alternative, to avoid having an entire file in
// memory at once this logic might open the first file
// written and copy it every time. For now, the choice
// is speed over memory efficiency.
data, err := io.ReadAll(tarReader)
if err != nil {
return err
getReader := func() io.Reader { return tarReader }

if sourceMode.IsRegular() {
var consumeData ConsumeData
if options.OnData != nil {
var err error
if consumeData, err = options.OnData(sourcePath, tarHeader.Size); err != nil {
return err
}
}
if consumeData != nil || (len(extractInfos) > 1 && globPath == "") {
// Read and cache the content so it may be reused.
// As an alternative, to avoid having an entire file in
// memory at once this logic might open the first file
// written and copy it every time. For now, the choice
// is speed over memory efficiency.
data, err := io.ReadAll(tarReader)
if err != nil {
return err
}
getReader = func() io.Reader { return bytes.NewReader(data) }
}
if consumeData != nil {
if err := consumeData(getReader()); err != nil {
return err
}
}
contentCache = data
}

var pathReader io.Reader = tarReader
origMode := tarHeader.Mode
for _, extractInfo := range extractInfos {
if contentIsCached {
pathReader = bytes.NewReader(contentCache)
}
var targetPath string
if globPath == "" {
targetPath = extractInfo.Path
Expand All @@ -264,10 +326,15 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error {
tarHeader.Mode = int64(extractInfo.Mode)
}
fsMode := tarHeader.FileInfo().Mode()
if options.OnCreate != nil {
if err := options.OnCreate(sourcePath, targetPath, tarHeader.Linkname, fsMode); err != nil {
return err
}
}
err := fsutil.Create(&fsutil.CreateOptions{
Path: filepath.Join(options.TargetDir, targetPath),
Mode: fsMode,
Data: pathReader,
Data: getReader(),
Link: tarHeader.Linkname,
})
if err != nil {
Expand Down
228 changes: 228 additions & 0 deletions internal/deb/extract_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package deb_test

import (
"bytes"
"io"
"io/fs"

. "gopkg.in/check.v1"

Expand Down Expand Up @@ -470,3 +472,229 @@ func (s *S) TestExtract(c *C) {
c.Assert(result, DeepEquals, test.result)
}
}

type callbackTest struct {
summary string
pkgdata []byte
extract map[string][]deb.ExtractInfo
callbacks [][]any
noConsume bool
noData bool
}

var callbackTests = []callbackTest{{
summary: "Trivial",
pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{
DIR(0701, "./a/"),
DIR(0702, "./a/b/"),
REG(0601, "./a/b/c", ""),
}),
extract: map[string][]deb.ExtractInfo{
"/**": []deb.ExtractInfo{{Path: "/**"}},
},
callbacks: [][]any{
{"create", "/a/", "/a/", "", 0701},
{"create", "/a/b/", "/a/b/", "", 0702},
{"data", "/a/b/c", 0, []byte{}},
{"create", "/a/b/c", "/a/b/c", "", 0601},
},
}, {
summary: "Data",
pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{
DIR(0701, "./a/"),
REG(0601, "./a/b", "foo"),
REG(0602, "./a/c", "bar"),
}),
extract: map[string][]deb.ExtractInfo{
"/**": []deb.ExtractInfo{{Path: "/**"}},
},
callbacks: [][]any{
{"create", "/a/", "/a/", "", 0701},
{"data", "/a/b", 3, []byte("foo")},
{"create", "/a/b", "/a/b", "", 0601},
{"data", "/a/c", 3, []byte("bar")},
{"create", "/a/c", "/a/c", "", 0602},
},
}, {
summary: "Symlinks",
pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{
REG(0601, "./a", ""),
LNK(0777, "./b", "/a"),
LNK(0777, "./c", "/d"),
}),
extract: map[string][]deb.ExtractInfo{
"/**": []deb.ExtractInfo{{Path: "/**"}},
},
noData: true,
callbacks: [][]any{
{"create", "/a", "/a", "", 0601},
{"create", "/b", "/b", "/a", 0777},
{"create", "/c", "/c", "/d", 0777},
},
}, {
summary: "Simple copied paths",
pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{
REG(0601, "./a", ""),
REG(0602, "./b", ""),
DIR(0701, "./c/"),
REG(0603, "./c/d", ""),
}),
extract: map[string][]deb.ExtractInfo{
"/a": []deb.ExtractInfo{{Path: "/a"}},
"/c/d": []deb.ExtractInfo{{Path: "/b"}},
},
noData: true,
callbacks: [][]any{
{"create", "/a", "/a", "", 0601},
{"create", "/c/d", "/b", "", 0603},
},
}, {
summary: "Parent directories",
pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{
DIR(0701, "./a/"),
DIR(0702, "./a/b/"),
DIR(0703, "./a/b/c/"),
REG(0601, "./a/b/c/d", ""),
}),
extract: map[string][]deb.ExtractInfo{
"/a/b/c/": []deb.ExtractInfo{{Path: "/a/b/c/"}},
},
callbacks: [][]any{
{"create", "/a/", "/a/", "", 0701},
{"create", "/a/b/", "/a/b/", "", 0702},
{"create", "/a/b/c/", "/a/b/c/", "", 0703},
},
}, {
summary: "Parent directories with globs",
pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{
DIR(0701, "./a/"),
DIR(0702, "./a/b/"),
DIR(0703, "./a/b/c/"),
REG(0601, "./a/b/c/d", ""),
}),
extract: map[string][]deb.ExtractInfo{
"/a/b/*/": []deb.ExtractInfo{{Path: "/a/b/*/"}},
},
callbacks: [][]any{
{"create", "/a/", "/a/", "", 0701},
{"create", "/a/b/", "/a/b/", "", 0702},
{"create", "/a/b/c/", "/a/b/c/", "", 0703},
},
}, {
summary: "Parent directories out of order",
pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{
REG(0601, "./a/b/c/d", ""),
DIR(0703, "./a/b/c/"),
DIR(0702, "./a/b/"),
DIR(0701, "./a/"),
}),
extract: map[string][]deb.ExtractInfo{
"/a/b/*/": []deb.ExtractInfo{{Path: "/a/b/*/"}},
},
callbacks: [][]any{
{"create", "", "/a/", "", 0755},
{"create", "", "/a/b/", "", 0755},
{"create", "/a/b/c/", "/a/b/c/", "", 0703},
{"create", "/a/b/", "/a/b/", "", 0702},
{"create", "/a/", "/a/", "", 0701},
},
}, {
summary: "Parent directories with early copy path",
pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{
DIR(0701, "./a/"),
REG(0601, "./a/b", ""),
DIR(0702, "./c/"),
REG(0602, "./c/d", ""),
}),
extract: map[string][]deb.ExtractInfo{
"/a/b": []deb.ExtractInfo{{Path: "/c/d"}},
},
noData: true,
callbacks: [][]any{
{"create", "", "/c/", "", 0755},
{"create", "/a/b", "/c/d", "", 0601},
{"create", "/c/", "/c/", "", 0702},
},
}, {
summary: "Same file twice with different content",
pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{
REG(0601, "./a", "foo"),
REG(0602, "./b", "bar"),
REG(0603, "./a", "baz"),
}),
extract: map[string][]deb.ExtractInfo{
"/*": []deb.ExtractInfo{{Path: "/*"}},
},
callbacks: [][]any{
{"data", "/a", 3, []byte("foo")},
{"create", "/a", "/a", "", 0601},
{"data", "/b", 3, []byte("bar")},
{"create", "/b", "/b", "", 0602},
{"data", "/a", 3, []byte("baz")},
{"create", "/a", "/a", "", 0603},
},
}, {
summary: "Source with multiple targets",
pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{
REG(0601, "./a", "aaa"),
REG(0602, "./b", "bu bu bu"),
}),
extract: map[string][]deb.ExtractInfo{
"/a": []deb.ExtractInfo{{Path: "/b"}},
"/b": []deb.ExtractInfo{
{Path: "/c", Mode: 0603},
{Path: "/d"},
},
},
callbacks: [][]any{
{"data", "/a", 3, []byte("aaa")},
{"create", "/a", "/b", "", 0601},
{"data", "/b", 8, []byte("bu bu bu")},
{"create", "/b", "/c", "", 0603},
{"create", "/b", "/d", "", 0602},
},
}}

func (s *S) TestExtractCallbacks(c *C) {
for _, test := range callbackTests {
c.Logf("Test: %s", test.summary)
dir := c.MkDir()
var callbacks [][]any
onData := func(source string, size int64) (deb.ConsumeData, error) {
if test.noConsume {
args := []any{"data", source, int(size), nil}
callbacks = append(callbacks, args)
return nil, nil
}
consume := func(reader io.Reader) error {
data, err := io.ReadAll(reader)
if err != nil {
return err
}
args := []any{"data", source, int(size), data}
callbacks = append(callbacks, args)
return nil
}
return consume, nil
}
if test.noData {
onData = nil
}
onCreate := func(source, target, link string, mode fs.FileMode) error {
modeInt := int(07777 & mode)
args := []any{"create", source, target, link, modeInt}
callbacks = append(callbacks, args)
return nil
}
options := deb.ExtractOptions{
Package: "test",
TargetDir: dir,
Extract: test.extract,
OnData: onData,
OnCreate: onCreate,
}
err := deb.Extract(bytes.NewBuffer(test.pkgdata), &options)
c.Assert(err, IsNil)
c.Assert(callbacks, DeepEquals, test.callbacks)
}
}

0 comments on commit b653917

Please sign in to comment.