diff --git a/internal/deb/extract.go b/internal/deb/extract.go index 98dba0b3..27f644f5 100644 --- a/internal/deb/extract.go +++ b/internal/deb/extract.go @@ -21,11 +21,56 @@ import ( "github.com/canonical/chisel/internal/strdist" ) +type ConsumeData func(reader io.Reader) error + +// DataCallback function is called when a tarball entry of a regular file is +// going to be extracted. +// +// The source and size parameters are set to the file's path in the tarball and +// the size of its content, respectively. When the returned ConsumeData function +// is not nil, it is called with the file's content. +// +// When the source path occurs in the tarball more than once, this function will +// be called for each occurrence. +// +// If either DataCallback or ConsumeData function returns a non-nil error, the +// Extract() function will fail with that error. +type DataCallback func(source string, size int64) (ConsumeData, error) + +// The CreateCallback function is called when a tarball entry is going to be +// extracted to a target path. +// +// The target, link, and mode parameters are set to the target's path, symbolic +// link target, and mode, respectively. The target's filesystem type can be +// determined from these parameters. If the link is not empty, the target is a +// symbolic link. Otherwise, if the target's path ends with /, it is a +// directory. Otherwise, it is a regular file. +// +// When the source parameter is not empty, the target is going to be extracted +// from a tarball entry with the source path. The function may be called more +// than once with the same source when the tarball entry is extracted to +// multiple different targets. +// +// Otherwise, the mode is 0755 and the target is going to be an implicitly +// created parent directory of another target, and when a directory entry with +// that path is later encountered in the tarball with a different mode, the +// function will be called again with the same target, source equal to the +// target, and the different mode. +// +// When the source path occurs in the tarball more than once, this function will +// be called for each target path for each occurrence. +// +// If CreateCallback function returns a non-nil error, the Extract() function +// will fail with that error. +type CreateCallback func(source, target, link string, mode fs.FileMode) error + type ExtractOptions struct { Package string TargetDir string Extract map[string][]ExtractInfo Globbed map[string][]string + OnData DataCallback + OnCreate CreateCallback } type ExtractInfo struct { @@ -209,14 +254,21 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error { return nil } info := dirInfos[dir] + source := dir if info.created { return nil } else if info.mode == 0 { info.mode = fs.ModeDir | 0755 + source = "" } if err := createParents(dir); err != nil { return err } + if options.OnCreate != nil { + if err := options.OnCreate(source, dir, "", info.mode); err != nil { + return err + } + } create := fsutil.CreateOptions{ Path: filepath.Join(options.TargetDir, dir), Mode: info.mode, @@ -229,27 +281,37 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error { return nil } - var contentCache []byte - var contentIsCached = len(extractInfos) > 1 && sourceMode.IsRegular() && globPath == "" - if contentIsCached { - // Read and cache the content so it may be reused. - // As an alternative, to avoid having an entire file in - // memory at once this logic might open the first file - // written and copy it every time. For now, the choice - // is speed over memory efficiency. - data, err := io.ReadAll(tarReader) - if err != nil { - return err + getReader := func() io.Reader { return tarReader } + + if sourceMode.IsRegular() { + var consumeData ConsumeData + if options.OnData != nil { + var err error + if consumeData, err = options.OnData(sourcePath, tarHeader.Size); err != nil { + return err + } + } + if consumeData != nil || (len(extractInfos) > 1 && globPath == "") { + // Read and cache the content so it may be reused. + // As an alternative, to avoid having an entire file in + // memory at once this logic might open the first file + // written and copy it every time. For now, the choice + // is speed over memory efficiency. + data, err := io.ReadAll(tarReader) + if err != nil { + return err + } + getReader = func() io.Reader { return bytes.NewReader(data) } + } + if consumeData != nil { + if err := consumeData(getReader()); err != nil { + return err + } } - contentCache = data } - var pathReader io.Reader = tarReader origMode := tarHeader.Mode for _, extractInfo := range extractInfos { - if contentIsCached { - pathReader = bytes.NewReader(contentCache) - } var targetPath string if globPath == "" { targetPath = extractInfo.Path @@ -264,10 +326,15 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error { tarHeader.Mode = int64(extractInfo.Mode) } fsMode := tarHeader.FileInfo().Mode() + if options.OnCreate != nil { + if err := options.OnCreate(sourcePath, targetPath, tarHeader.Linkname, fsMode); err != nil { + return err + } + } err := fsutil.Create(&fsutil.CreateOptions{ Path: filepath.Join(options.TargetDir, targetPath), Mode: fsMode, - Data: pathReader, + Data: getReader(), Link: tarHeader.Linkname, }) if err != nil { diff --git a/internal/deb/extract_test.go b/internal/deb/extract_test.go index 47411bda..736c44b1 100644 --- a/internal/deb/extract_test.go +++ b/internal/deb/extract_test.go @@ -2,6 +2,9 @@ package deb_test import ( "bytes" + "io" + "io/fs" + "io/ioutil" . "gopkg.in/check.v1" @@ -470,3 +473,229 @@ func (s *S) TestExtract(c *C) { c.Assert(result, DeepEquals, test.result) } } + +type callbackTest struct { + summary string + pkgdata []byte + extract map[string][]deb.ExtractInfo + callbacks [][]any + noConsume bool + noData bool +} + +var callbackTests = []callbackTest{{ + summary: "Trivial", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + DIR(0701, "./a/"), + DIR(0702, "./a/b/"), + REG(0601, "./a/b/c", ""), + }), + extract: map[string][]deb.ExtractInfo{ + "/**": []deb.ExtractInfo{{Path: "/**"}}, + }, + callbacks: [][]any{ + {"create", "/a/", "/a/", "", 0701}, + {"create", "/a/b/", "/a/b/", "", 0702}, + {"data", "/a/b/c", 0, []byte{}}, + {"create", "/a/b/c", "/a/b/c", "", 0601}, + }, +}, { + summary: "Data", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + DIR(0701, "./a/"), + REG(0601, "./a/b", "foo"), + REG(0602, "./a/c", "bar"), + }), + extract: map[string][]deb.ExtractInfo{ + "/**": []deb.ExtractInfo{{Path: "/**"}}, + }, + callbacks: [][]any{ + {"create", "/a/", "/a/", "", 0701}, + {"data", "/a/b", 3, []byte("foo")}, + {"create", "/a/b", "/a/b", "", 0601}, + {"data", "/a/c", 3, []byte("bar")}, + {"create", "/a/c", "/a/c", "", 0602}, + }, +}, { + summary: "Symlinks", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + REG(0601, "./a", ""), + LNK(0777, "./b", "/a"), + LNK(0777, "./c", "/d"), + }), + extract: map[string][]deb.ExtractInfo{ + "/**": []deb.ExtractInfo{{Path: "/**"}}, + }, + noData: true, + callbacks: [][]any{ + {"create", "/a", "/a", "", 0601}, + {"create", "/b", "/b", "/a", 0777}, + {"create", "/c", "/c", "/d", 0777}, + }, +}, { + summary: "Simple copied paths", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + REG(0601, "./a", ""), + REG(0602, "./b", ""), + DIR(0701, "./c/"), + REG(0603, "./c/d", ""), + }), + extract: map[string][]deb.ExtractInfo{ + "/a": []deb.ExtractInfo{{Path: "/a"}}, + "/c/d": []deb.ExtractInfo{{Path: "/b"}}, + }, + noData: true, + callbacks: [][]any{ + {"create", "/a", "/a", "", 0601}, + {"create", "/c/d", "/b", "", 0603}, + }, +}, { + summary: "Parent directories", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + DIR(0701, "./a/"), + DIR(0702, "./a/b/"), + DIR(0703, "./a/b/c/"), + REG(0601, "./a/b/c/d", ""), + }), + extract: map[string][]deb.ExtractInfo{ + "/a/b/c/": []deb.ExtractInfo{{Path: "/a/b/c/"}}, + }, + callbacks: [][]any{ + {"create", "/a/", "/a/", "", 0701}, + {"create", "/a/b/", "/a/b/", "", 0702}, + {"create", "/a/b/c/", "/a/b/c/", "", 0703}, + }, +}, { + summary: "Parent directories with globs", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + DIR(0701, "./a/"), + DIR(0702, "./a/b/"), + DIR(0703, "./a/b/c/"), + REG(0601, "./a/b/c/d", ""), + }), + extract: map[string][]deb.ExtractInfo{ + "/a/b/*/": []deb.ExtractInfo{{Path: "/a/b/*/"}}, + }, + callbacks: [][]any{ + {"create", "/a/", "/a/", "", 0701}, + {"create", "/a/b/", "/a/b/", "", 0702}, + {"create", "/a/b/c/", "/a/b/c/", "", 0703}, + }, +}, { + summary: "Parent directories out of order", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + REG(0601, "./a/b/c/d", ""), + DIR(0703, "./a/b/c/"), + DIR(0702, "./a/b/"), + DIR(0701, "./a/"), + }), + extract: map[string][]deb.ExtractInfo{ + "/a/b/*/": []deb.ExtractInfo{{Path: "/a/b/*/"}}, + }, + callbacks: [][]any{ + {"create", "", "/a/", "", 0755}, + {"create", "", "/a/b/", "", 0755}, + {"create", "/a/b/c/", "/a/b/c/", "", 0703}, + {"create", "/a/b/", "/a/b/", "", 0702}, + {"create", "/a/", "/a/", "", 0701}, + }, +}, { + summary: "Parent directories with early copy path", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + DIR(0701, "./a/"), + REG(0601, "./a/b", ""), + DIR(0702, "./c/"), + REG(0602, "./c/d", ""), + }), + extract: map[string][]deb.ExtractInfo{ + "/a/b": []deb.ExtractInfo{{Path: "/c/d"}}, + }, + noData: true, + callbacks: [][]any{ + {"create", "", "/c/", "", 0755}, + {"create", "/a/b", "/c/d", "", 0601}, + {"create", "/c/", "/c/", "", 0702}, + }, +}, { + summary: "Same file twice with different content", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + REG(0601, "./a", "foo"), + REG(0602, "./b", "bar"), + REG(0603, "./a", "baz"), + }), + extract: map[string][]deb.ExtractInfo{ + "/*": []deb.ExtractInfo{{Path: "/*"}}, + }, + callbacks: [][]any{ + {"data", "/a", 3, []byte("foo")}, + {"create", "/a", "/a", "", 0601}, + {"data", "/b", 3, []byte("bar")}, + {"create", "/b", "/b", "", 0602}, + {"data", "/a", 3, []byte("baz")}, + {"create", "/a", "/a", "", 0603}, + }, +}, { + summary: "Source with multiple targets", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + REG(0601, "./a", "aaa"), + REG(0602, "./b", "bu bu bu"), + }), + extract: map[string][]deb.ExtractInfo{ + "/a": []deb.ExtractInfo{{Path: "/b"}}, + "/b": []deb.ExtractInfo{ + {Path: "/c", Mode: 0603}, + {Path: "/d"}, + }, + }, + callbacks: [][]any{ + {"data", "/a", 3, []byte("aaa")}, + {"create", "/a", "/b", "", 0601}, + {"data", "/b", 8, []byte("bu bu bu")}, + {"create", "/b", "/c", "", 0603}, + {"create", "/b", "/d", "", 0602}, + }, +}} + +func (s *S) TestExtractCallbacks(c *C) { + for _, test := range callbackTests { + c.Logf("Test: %s", test.summary) + dir := c.MkDir() + var callbacks [][]any + onData := func(source string, size int64) (deb.ConsumeData, error) { + if test.noConsume { + args := []any{"data", source, int(size), nil} + callbacks = append(callbacks, args) + return nil, nil + } + consume := func(reader io.Reader) error { + data, err := ioutil.ReadAll(reader) + if err != nil { + return err + } + args := []any{"data", source, int(size), data} + callbacks = append(callbacks, args) + return nil + } + return consume, nil + } + if test.noData { + onData = nil + } + onCreate := func(source, target, link string, mode fs.FileMode) error { + modeInt := int(07777 & mode) + args := []any{"create", source, target, link, modeInt} + callbacks = append(callbacks, args) + return nil + } + options := deb.ExtractOptions{ + Package: "test", + TargetDir: dir, + Extract: test.extract, + OnData: onData, + OnCreate: onCreate, + } + err := deb.Extract(bytes.NewBuffer(test.pkgdata), &options) + c.Assert(err, IsNil) + c.Assert(callbacks, DeepEquals, test.callbacks) + } +}