From 0d5d78ccf0b91055e1919831b29f6f6291249ccc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Virtus?= Date: Sun, 18 Jun 2023 00:46:03 +0200 Subject: [PATCH 1/8] slicer/test: Support for custom packages Currently we only test with the embedded base-files files package. This quite limits what we can test. But since commit a3315e3 ("testutil/pkgdata: Create deb programatically") we have the ability to define our own custom package content. Add support for testing with custom packages. These can be defined in each test case per archive name. The content can be defined directly in test case definition with the help of testutil.MustMakeDeb(). The package content is wrapped in the testPackage structure. This introduces one level of nesting in each test case package definition. In future, we will add package metadata to the package definition, namely version. So wrapping the package content in the structure now and later just adding a new field to it the structure later will make the future diff much smaller. --- internal/slicer/slicer_test.go | 112 ++++++++++++++++++++++++++++++--- 1 file changed, 104 insertions(+), 8 deletions(-) diff --git a/internal/slicer/slicer_test.go b/internal/slicer/slicer_test.go index aa9d0f51..471d64a1 100644 --- a/internal/slicer/slicer_test.go +++ b/internal/slicer/slicer_test.go @@ -16,9 +16,20 @@ import ( "github.com/canonical/chisel/internal/testutil" ) +var ( + Reg = testutil.Reg + Dir = testutil.Dir + Lnk = testutil.Lnk +) + +type testPackage struct { + content []byte +} + type slicerTest struct { summary string arch string + pkgs map[string]map[string]testPackage release map[string]string slices []setup.SliceKey hackopt func(c *C, opts *slicer.RunOptions) @@ -515,6 +526,77 @@ var slicerTests = []slicerTest{{ "/usr/bin/": "dir 0755", "/usr/bin/hello": "file 0775 eaf29575", }, +}, { + summary: "Custom archives with custom packages", + pkgs: map[string]map[string]testPackage{ + "leptons": { + "electron": testPackage{ + content: testutil.MustMakeDeb([]testutil.TarEntry{ + Dir(0755, "./"), + Dir(0755, "./mass/"), + Reg(0644, "./mass/electron", "9.1093837015E−31 kg\n"), + Dir(0755, "./usr/"), + Dir(0755, "./usr/share/"), + Dir(0755, "./usr/share/doc/"), + Dir(0755, "./usr/share/doc/electron/"), + Reg(0644, "./usr/share/doc/electron/copyright", ""), + }), + }, + }, + "hadrons": { + "proton": testPackage{ + content: testutil.MustMakeDeb([]testutil.TarEntry{ + Dir(0755, "./"), + Dir(0755, "./mass/"), + Reg(0644, "./mass/proton", "1.67262192369E−27 kg\n"), + }), + }, + }, + }, + release: map[string]string{ + "chisel.yaml": ` + format: chisel-v1 + archives: + leptons: + version: 1 + suites: [main] + components: [main, universe] + default: true + hadrons: + version: 1 + suites: [main] + components: [main] + `, + "slices/mydir/electron.yaml": ` + package: electron + slices: + mass: + contents: + /mass/electron: + `, + "slices/mydir/proton.yaml": ` + package: proton + archive: hadrons + slices: + mass: + contents: + /mass/proton: + `, + }, + slices: []setup.SliceKey{ + {"electron", "mass"}, + {"proton", "mass"}, + }, + result: map[string]string{ + "/mass/": "dir 0755", + "/mass/electron": "file 0644 a1258e30", + "/mass/proton": "file 0644 a2390d10", + "/usr/": "dir 0755", + "/usr/share/": "dir 0755", + "/usr/share/doc/": "dir 0755", + "/usr/share/doc/electron/": "dir 0755", + "/usr/share/doc/electron/copyright": "file 0644 empty", + }, }} const defaultChiselYaml = ` @@ -527,7 +609,7 @@ const defaultChiselYaml = ` type testArchive struct { options archive.Options - pkgs map[string][]byte + pkgs map[string]testPackage } func (a *testArchive) Options() *archive.Options { @@ -536,7 +618,7 @@ func (a *testArchive) Options() *archive.Options { func (a *testArchive) Fetch(pkg string) (io.ReadCloser, error) { if data, ok := a.pkgs[pkg]; ok { - return io.NopCloser(bytes.NewBuffer(data)), nil + return io.NopCloser(bytes.NewBuffer(data.content)), nil } return nil, fmt.Errorf("attempted to open %q package", pkg) } @@ -569,16 +651,23 @@ func (s *S) TestRun(c *C) { selection, err := setup.Select(release, test.slices) c.Assert(err, IsNil) - pkgs := map[string][]byte{ - "base-files": testutil.PackageData["base-files"], + pkgs := map[string]testPackage{ + "base-files": testPackage{content: testutil.PackageData["base-files"]}, } for name, entries := range packageEntries { deb, err := testutil.MakeDeb(entries) c.Assert(err, IsNil) - pkgs[name] = deb + pkgs[name] = testPackage{content: deb} } archives := map[string]archive.Archive{} for name, setupArchive := range release.Archives { + var archivePkgs map[string]testPackage + if test.pkgs != nil { + archivePkgs = test.pkgs[name] + } + if archivePkgs == nil { + archivePkgs = pkgs + } archive := &testArchive{ options: archive.Options{ Label: setupArchive.Name, @@ -587,7 +676,7 @@ func (s *S) TestRun(c *C) { Components: setupArchive.Components, Arch: test.arch, }, - pkgs: pkgs, + pkgs: archivePkgs, } archives[name] = archive } @@ -611,8 +700,15 @@ func (s *S) TestRun(c *C) { if test.result != nil { result := make(map[string]string, len(copyrightEntries)+len(test.result)) - for k, v := range copyrightEntries { - result[k] = v + if test.pkgs == nil { + // This was added in order to not specify copyright entries for each + // existing test. These tests use only the base-files embedded + // package. Custom packages may not include copyright entries + // though. So if a test defines any custom packages, it must include + // copyright entries explicitly in the results. + for k, v := range copyrightEntries { + result[k] = v + } } for k, v := range test.result { result[k] = v From 19c9af6869a302d4b07a4d3a5ee23530818733ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Virtus?= Date: Fri, 16 Jun 2023 07:32:31 +0200 Subject: [PATCH 2/8] archive: Add Info() method At some point we will need to get information about (deb) packages from (Go) packages other than archive, namely from the slicer package. For instance: - Package entries in Chisel DB will include version, digest and architecture. - Multi-archive package selection will need to know versions of a package in all configured archives. Add Info() method to the Archive interface that returns a new PackageInfo interface that contains accessors for the underlying control.Section of the package. Currently these accessors are Name(), Version(), Arch() and SHA256() methods. The reason to introduce PackageInfo interface instead of returing control.Section directly is keep the archive abstraction and not leak implementation details. --- internal/archive/archive.go | 24 +++++++++++++++++++++++ internal/archive/archive_test.go | 33 ++++++++++++++++++++++++++++++++ internal/slicer/slicer_test.go | 29 ++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+) diff --git a/internal/archive/archive.go b/internal/archive/archive.go index 63badc8f..56a3fcd9 100644 --- a/internal/archive/archive.go +++ b/internal/archive/archive.go @@ -13,9 +13,17 @@ import ( "github.com/canonical/chisel/internal/deb" ) +type PackageInfo interface { + Name() string + Version() string + Arch() string + SHA256() string +} + type Archive interface { Options() *Options Fetch(pkg string) (io.ReadCloser, error) + Info(pkg string) PackageInfo Exists(pkg string) bool } @@ -86,6 +94,22 @@ func (a *ubuntuArchive) Exists(pkg string) bool { return err == nil } +type pkgInfo struct{ control.Section } + +var _ PackageInfo = pkgInfo{} + +func (info pkgInfo) Name() string { return info.Get("Package") } +func (info pkgInfo) Version() string { return info.Get("Version") } +func (info pkgInfo) Arch() string { return info.Get("Architecture") } +func (info pkgInfo) SHA256() string { return info.Get("SHA256") } + +func (a *ubuntuArchive) Info(pkg string) PackageInfo { + if section, _, _ := a.selectPackage(pkg); section != nil { + return &pkgInfo{section} + } + return nil +} + func (a *ubuntuArchive) selectPackage(pkg string) (control.Section, *ubuntuIndex, error) { var selectedVersion string var selectedSection control.Section diff --git a/internal/archive/archive_test.go b/internal/archive/archive_test.go index cb9f3278..b18fc60c 100644 --- a/internal/archive/archive_test.go +++ b/internal/archive/archive_test.go @@ -332,6 +332,39 @@ func (s *httpSuite) TestArchiveLabels(c *C) { c.Assert(err, ErrorMatches, `.*\bno Ubuntu section`) } +func (s *httpSuite) TestPackageInfo(c *C) { + s.prepareArchive("jammy", "22.04", "amd64", []string{"main", "universe"}) + + options := archive.Options{ + Label: "ubuntu", + Version: "22.04", + Arch: "amd64", + Suites: []string{"jammy"}, + Components: []string{"main", "universe"}, + CacheDir: c.MkDir(), + } + + archive, err := archive.Open(&options) + c.Assert(err, IsNil) + + info1 := archive.Info("mypkg1") + c.Assert(info1, NotNil) + c.Assert(info1.Name(), Equals, "mypkg1") + c.Assert(info1.Version(), Equals, "1.1") + c.Assert(info1.Arch(), Equals, "amd64") + c.Assert(info1.SHA256(), Equals, "1f08ef04cfe7a8087ee38a1ea35fa1810246648136c3c42d5a61ad6503d85e05") + + info3 := archive.Info("mypkg3") + c.Assert(info3, NotNil) + c.Assert(info3.Name(), Equals, "mypkg3") + c.Assert(info3.Version(), Equals, "1.3") + c.Assert(info3.Arch(), Equals, "amd64") + c.Assert(info3.SHA256(), Equals, "fe377bf13ba1a5cb287cb4e037e6e7321281c929405ae39a72358ef0f5d179aa") + + info99 := archive.Info("mypkg99") + c.Assert(info99, IsNil) +} + func read(r io.Reader) string { data, err := io.ReadAll(r) if err != nil { diff --git a/internal/slicer/slicer_test.go b/internal/slicer/slicer_test.go index 471d64a1..cdb68f12 100644 --- a/internal/slicer/slicer_test.go +++ b/internal/slicer/slicer_test.go @@ -23,6 +23,7 @@ var ( ) type testPackage struct { + info map[string]string content []byte } @@ -607,6 +608,22 @@ const defaultChiselYaml = ` components: [main, universe] ` +type testPackageInfo map[string]string + +var _ archive.PackageInfo = (testPackageInfo)(nil) + +func (info testPackageInfo) Name() string { return info["Package"] } +func (info testPackageInfo) Version() string { return info["Version"] } +func (info testPackageInfo) Arch() string { return info["Architecture"] } +func (info testPackageInfo) SHA256() string { return info["SHA256"] } + +func (s testPackageInfo) Get(key string) (value string) { + if s != nil { + value = s[key] + } + return +} + type testArchive struct { options archive.Options pkgs map[string]testPackage @@ -628,6 +645,18 @@ func (a *testArchive) Exists(pkg string) bool { return ok } +func (a *testArchive) Info(pkg string) archive.PackageInfo { + var info map[string]string + if pkgData, ok := a.pkgs[pkg]; ok { + if info = pkgData.info; info == nil { + info = map[string]string{ + "Version": "1.0", + } + } + } + return testPackageInfo(info) +} + func (s *S) TestRun(c *C) { for _, test := range slicerTests { c.Logf("Summary: %s", test.summary) From 7febfeb63e4f01af41393ef995e5d2be2902ecc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Virtus?= Date: Thu, 1 Jun 2023 23:44:33 +0200 Subject: [PATCH 3/8] fsutil: Add SlashedPathDir() and SlashedPathClean() functions From the code: These functions exists because we work with slash terminated directory paths that come from deb package tarballs but standard library path functions treat slash terminated paths as unclean. We use ad-hoc code to make filepath.Dir() slash terminated in two places right now. For example this code targetDir := filepath.Dir(strings.TrimRight(targetPath, "/")) + "/" is not strictly correct as "/a/b/.///" will be turned into "/a/b/./" which is still "/a/b". Since we deal with slash terminated directory paths everywhere, create and use dedicated helper functions to process them. --- internal/fsutil/path.go | 66 ++++++++++++++++++++++++++++++++++++ internal/fsutil/path_test.go | 54 +++++++++++++++++++++++++++++ internal/slicer/slicer.go | 18 ++++------ 3 files changed, 127 insertions(+), 11 deletions(-) create mode 100644 internal/fsutil/path.go create mode 100644 internal/fsutil/path_test.go diff --git a/internal/fsutil/path.go b/internal/fsutil/path.go new file mode 100644 index 00000000..34469b46 --- /dev/null +++ b/internal/fsutil/path.go @@ -0,0 +1,66 @@ +package fsutil + +import ( + "path/filepath" +) + +// isDirPath returns whether the path refers to a directory. +// The path refers to a directory when it ends with "/", "/." or "/..", or when +// it equals "." or "..". +func isDirPath(path string) bool { + i := len(path) - 1 + if i < 0 { + return true + } + if path[i] == '.' { + i-- + if i < 0 { + return true + } + if path[i] == '.' { + i-- + if i < 0 { + return true + } + } + } + if path[i] == '/' { + return true + } + return false +} + +// Debian package tarballs present paths slightly differently to what we would +// normally classify as clean paths. While a traditional clean file path is identical +// to a clean deb package file path, the deb package directory path always ends +// with a slash. Although the change only affects directory paths, the implication +// is that a directory path without a slash is interpreted as a file path. For this +// reason, we need to be very careful and handle both file and directory paths using +// a new set of functions. We call this new path type a Slashed Path. A slashed path +// allows us to identify a file or directory simply using lexical analysis. + +// SlashedPathClean takes a file or slashed directory path as input, and produces +// the shortest equivalent as output. An input path ending without a slash will be +// interpreted as a file path. Directory paths should always end with a slash. +// These functions exists because we work with slash terminated directory paths +// that come from deb package tarballs but standard library path functions +// treat slash terminated paths as unclean. +func SlashedPathClean(path string) string { + clean := filepath.Clean(path) + if clean != "/" && isDirPath(path) { + clean += "/" + } + return clean +} + +// SlashedPathDir takes a file or slashed directory path as input, cleans the +// path and returns the parent directory. An input path ending without a slash +// will be interpreted as a file path. Directory paths should always end with a slash. +// Clean is like filepath.Clean() but trailing slash is kept. +func SlashedPathDir(path string) string { + parent := filepath.Dir(filepath.Clean(path)) + if parent != "/" { + parent += "/" + } + return parent +} diff --git a/internal/fsutil/path_test.go b/internal/fsutil/path_test.go new file mode 100644 index 00000000..864ec594 --- /dev/null +++ b/internal/fsutil/path_test.go @@ -0,0 +1,54 @@ +package fsutil_test + +import ( + . "gopkg.in/check.v1" + + "github.com/canonical/chisel/internal/fsutil" +) + +var cleanAndDirTestCases = []struct { + inputPath string + resultClean string + resultDir string +}{ + {"/a/b/c", "/a/b/c", "/a/b/"}, + {"/a/b/c/", "/a/b/c/", "/a/b/"}, + {"/a/b/c//", "/a/b/c/", "/a/b/"}, + {"/a/b//c", "/a/b/c", "/a/b/"}, + {"/a/b/c/.", "/a/b/c/", "/a/b/"}, + {"/a/b/c/.///.", "/a/b/c/", "/a/b/"}, + {"/a/b/./c/", "/a/b/c/", "/a/b/"}, + {"/a/b/.///./c", "/a/b/c", "/a/b/"}, + {"/a/b/c/..", "/a/b/", "/a/"}, + {"/a/b/c/..///./", "/a/b/", "/a/"}, + {"/a/b/c/../.", "/a/b/", "/a/"}, + {"/a/b/../c/", "/a/c/", "/a/"}, + {"/a/b/..///./c", "/a/c", "/a/"}, + {"a/b/./c", "a/b/c", "a/b/"}, + {"./a/b/./c", "a/b/c", "a/b/"}, + {"/", "/", "/"}, + {"///", "/", "/"}, + {"///.///", "/", "/"}, + {"/././.", "/", "/"}, + {".", "./", "./"}, + {".///", "./", "./"}, + {"..", "../", "./"}, + {"..///.", "../", "./"}, + {"../../..", "../../../", "../../"}, + {"..///.///../..", "../../../", "../../"}, + {"", "./", "./"}, +} + +func (s *S) TestSlashedPathClean(c *C) { + for _, t := range cleanAndDirTestCases { + c.Logf("%s => %s", t.inputPath, t.resultClean) + c.Assert(fsutil.SlashedPathClean(t.inputPath), Equals, t.resultClean) + } +} + +func (s *S) TestSlashedPathDir(c *C) { + for _, t := range cleanAndDirTestCases { + c.Logf("%s => %s", t.inputPath, t.resultDir) + c.Assert(fsutil.SlashedPathDir(t.inputPath), Equals, t.resultDir) + } +} diff --git a/internal/slicer/slicer.go b/internal/slicer/slicer.go index 2b684b5d..9cd35aec 100644 --- a/internal/slicer/slicer.go +++ b/internal/slicer/slicer.go @@ -32,25 +32,21 @@ func Run(options *RunOptions) error { knownPaths["/"] = true + // addKnownPath path adds path and all its directory parent paths into + // knownPaths set. addKnownPath := func(path string) { if path[0] != '/' { panic("bug: tried to add relative path to known paths") } - cleanPath := filepath.Clean(path) - slashPath := cleanPath - if path[len(path)-1] == '/' && cleanPath != "/" { - slashPath += "/" - } + path = fsutil.SlashedPathClean(path) for { - if _, ok := knownPaths[slashPath]; ok { + if _, ok := knownPaths[path]; ok { break } - knownPaths[slashPath] = true - cleanPath = filepath.Dir(cleanPath) - if cleanPath == "/" { + knownPaths[path] = true + if path = fsutil.SlashedPathDir(path); path == "/" { break } - slashPath = cleanPath + "/" } } @@ -113,7 +109,7 @@ func Run(options *RunOptions) error { hasCopyright = true } } else { - targetDir := filepath.Dir(strings.TrimRight(targetPath, "/")) + "/" + targetDir := fsutil.SlashedPathDir(targetPath) if targetDir == "" || targetDir == "/" { continue } From 75696061250af4ba1cc245b7fb35c60b70f50812 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Virtus?= Date: Tue, 13 Jun 2023 16:06:29 +0200 Subject: [PATCH 4/8] extract: Proper parent directory modes The purpose of parent directory handling in deb/extract.go is to create parent directories of requested paths with the same attributes (only mode as of now) that appear in the package tarball. However, the current implementation is not correct when glob paths are requested. In what follows parent directory refers to a directory path that is not explicitly requested for extraction, but that is the parent of other paths that are requested for extraction, and so it is assumed to be implicitly requested for extraction. Currently, whether a package path should be extracted is determined by the shouldExtract() function that iterates over requested paths and for each checks whether it matches the package path if it's glob, or if it's non-glob, whether it equals the package path or whether some of its target paths have the package path as the parent. There are two problems with this implementation: 1) It only checks whether a package path is the parent of any target path of a requested non-glob path. It does not, and probably even cannot, check whether it is the parent of a requested glob path. 2) It iterates over the map of requested paths for every package path, even though for requested non-glob paths, it can match by directory lookup. And in each iteration, it checks whether a requested path is a glob by searching for wildcards in it. This commit addresses mainly the first problem, but it touches the second one as well. Track modes of directories as we encounter them in the tarball. Then, when creating a target path, create its missing parent directories with modes with which they were recorded in the tarball, or 0755 if they were not recorded yet. In the latter case, the directory mode is also tracked so that the directory can be recreated with the proper mode if we find it in the tarball later. This algorithm works for both glob and non-glob requested paths. Since the matching that was previously done in the shouldExtract() function is not used anymore, the function was removed. As part of this change, the requested glob paths are recorded before the extraction begins into the dedicated list which is scanned only when requested non-glob path lookup fails. We still match requested non-glob and glob paths separately. Ideally, we would use some kind of pattern matching on trees, something like a radix tree which also supports wildcards, but this commit is humble. One consequence of this change is that when an optional path that doesn't exist in the tarball is requested, its parent directories are not created (if they are not the parents of other requested paths). But this new behavior is arguably more natural than the old behavior where we created parent directories for non-existent paths, which seems to have been just an artifact of the implementation. Therefore, one test had to be changed for this behavior change. Since we do not allow optional paths to be defined in slices, this change is visible only to callers of the deb.Extract() function. In chisel, these callers are extract test and slicer. The latter depended on the old behavior to create parent directories for non-extracted content paths by including their direct parent directories in the requested paths. The behavior of chisel is preserved by changing slicer to include all, and not just direct, parent directories of non-extracted content paths in the requested paths. --- internal/deb/extract.go | 157 ++++++++++++++++++++++------------- internal/deb/extract_test.go | 141 ++++++++++++++++++++++++++++++- internal/slicer/slicer.go | 13 ++- 3 files changed, 241 insertions(+), 70 deletions(-) diff --git a/internal/deb/extract.go b/internal/deb/extract.go index 5de2f403..3b837bb5 100644 --- a/internal/deb/extract.go +++ b/internal/deb/extract.go @@ -6,6 +6,7 @@ import ( "compress/gzip" "fmt" "io" + "io/fs" "os" "path/filepath" "sort" @@ -109,34 +110,8 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error { syscall.Umask(oldUmask) }() - shouldExtract := func(pkgPath string) (globPath string, ok bool) { - if pkgPath == "" { - return "", false - } - pkgPathIsDir := pkgPath[len(pkgPath)-1] == '/' - for extractPath, extractInfos := range options.Extract { - if extractPath == "" { - continue - } - switch { - case strings.ContainsAny(extractPath, "*?"): - if strdist.GlobPath(extractPath, pkgPath) { - return extractPath, true - } - case extractPath == pkgPath: - return "", true - case pkgPathIsDir: - for _, extractInfo := range extractInfos { - if strings.HasPrefix(extractInfo.Path, pkgPath) { - return "", true - } - } - } - } - return "", false - } - pendingPaths := make(map[string]bool) + var globs []string for extractPath, extractInfos := range options.Extract { for _, extractInfo := range extractInfos { if !extractInfo.Optional { @@ -144,8 +119,26 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error { break } } + if strings.ContainsAny(extractPath, "*?") { + globs = append(globs, extractPath) + } } + // dirInfo represents a directory that we + // a) encountered in the tarball, + // b) created, or + // c) both a) and c). + type dirInfo struct { + // mode of the directory with which we + // a) encountered it in the tarball, or + // b) created it + mode fs.FileMode + // whether we created this directory + created bool + } + // directories we encountered and/or created + dirInfos := make(map[string]dirInfo) + tarReader := tar.NewReader(dataReader) for { tarHeader, err := tarReader.Next() @@ -161,44 +154,83 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error { continue } sourcePath = sourcePath[1:] - globPath, ok := shouldExtract(sourcePath) - if !ok { - continue + sourceMode := tarHeader.FileInfo().Mode() + + globPath := "" + extractInfos := options.Extract[sourcePath] + + if extractInfos == nil { + for _, glob := range globs { + if strdist.GlobPath(glob, sourcePath) { + globPath = glob + extractInfos = []ExtractInfo{{Path: glob}} + break + } + } } - sourceIsDir := sourcePath[len(sourcePath)-1] == '/' + // Is this a directory path that was not requested? + if extractInfos == nil && sourceMode.IsDir() { + if info := dirInfos[sourcePath]; info.mode != sourceMode { + // We have not seen this directory yet, or we + // have seen or created it with a different mode + // before. Record the source path mode. + info.mode = sourceMode + dirInfos[sourcePath] = info + if info.created { + // We have created this directory before + // with a different mode. Create it + // again with the proper mode. + extractInfos = []ExtractInfo{{Path: sourcePath}} + } + } + } - //debugf("Extracting header: %#v", tarHeader) + if extractInfos == nil { + continue + } - var extractInfos []ExtractInfo if globPath != "" { - extractInfos = options.Extract[globPath] - delete(pendingPaths, globPath) if options.Globbed != nil { options.Globbed[globPath] = append(options.Globbed[globPath], sourcePath) } + delete(pendingPaths, globPath) } else { - extractInfos, ok = options.Extract[sourcePath] - if ok { - delete(pendingPaths, sourcePath) - } else { - // Base directory for extracted content. Relevant mainly to preserve - // the metadata, since the extracted content itself will also create - // any missing directories unaccounted for in the options. - err := fsutil.Create(&fsutil.CreateOptions{ - Path: filepath.Join(options.TargetDir, sourcePath), - Mode: tarHeader.FileInfo().Mode(), - MakeParents: true, - }) - if err != nil { - return err - } - continue + delete(pendingPaths, sourcePath) + } + + // createParents creates missing parent directories of the path + // with modes with which they were encountered in the tarball or + // 0755 if they were not encountered yet. + var createParents func(path string) error + createParents = func(path string) error { + dir := fsutil.SlashedPathDir(path) + if dir == "/" { + return nil } + info := dirInfos[dir] + if info.created { + return nil + } else if info.mode == 0 { + info.mode = fs.ModeDir | 0755 + } + if err := createParents(dir); err != nil { + return err + } + create := fsutil.CreateOptions{ + Path: filepath.Join(options.TargetDir, dir), + Mode: info.mode, + } + if err := fsutil.Create(&create); err != nil { + return err + } + info.created = true + dirInfos[dir] = info + return nil } var contentCache []byte - var contentIsCached = len(extractInfos) > 1 && !sourceIsDir && globPath == "" + var contentIsCached = len(extractInfos) > 1 && sourceMode.IsRegular() && globPath == "" if contentIsCached { // Read and cache the content so it may be reused. // As an alternative, to avoid having an entire file in @@ -219,23 +251,30 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error { } var targetPath string if globPath == "" { - targetPath = filepath.Join(options.TargetDir, extractInfo.Path) + targetPath = extractInfo.Path } else { - targetPath = filepath.Join(options.TargetDir, sourcePath) + targetPath = sourcePath + } + if err := createParents(targetPath); err != nil { + return err } if extractInfo.Mode != 0 { tarHeader.Mode = int64(extractInfo.Mode) } + fsMode := tarHeader.FileInfo().Mode() err := fsutil.Create(&fsutil.CreateOptions{ - Path: targetPath, - Mode: tarHeader.FileInfo().Mode(), - Data: pathReader, - Link: tarHeader.Linkname, - MakeParents: true, + Path: filepath.Join(options.TargetDir, targetPath), + Mode: fsMode, + Data: pathReader, + Link: tarHeader.Linkname, }) if err != nil { return err } + if fsMode.IsDir() { + // Record the target directory mode. + dirInfos[targetPath] = dirInfo{fsMode, true} + } if globPath != "" { break } diff --git a/internal/deb/extract_test.go b/internal/deb/extract_test.go index 3147dc36..7de591ab 100644 --- a/internal/deb/extract_test.go +++ b/internal/deb/extract_test.go @@ -9,6 +9,12 @@ import ( "github.com/canonical/chisel/internal/testutil" ) +var ( + Reg = testutil.Reg + Dir = testutil.Dir + Lnk = testutil.Lnk +) + type extractTest struct { summary string pkgdata []byte @@ -260,10 +266,7 @@ var extractTests = []extractTest{{ }, }, result: map[string]string{ - "/etc/": "dir 0755", - "/usr/": "dir 0755", - "/usr/bin/": "dir 0755", - "/tmp/": "dir 01777", + "/etc/": "dir 0755", }, }, { summary: "Optional entries mixed in cannot be missing", @@ -280,6 +283,136 @@ var extractTests = []extractTest{{ }, }, error: `cannot extract from package "base-files": no content at /usr/bin/hallo`, +}, { + summary: "Implicit parent directories", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + Dir(0701, "./a/"), + Dir(0702, "./a/b/"), + Reg(0601, "./a/b/c", ""), + }), + options: deb.ExtractOptions{ + Extract: map[string][]deb.ExtractInfo{ + "/a/b/c": []deb.ExtractInfo{{Path: "/a/b/c"}}, + }, + }, + result: map[string]string{ + "/a/": "dir 0701", + "/a/b/": "dir 0702", + "/a/b/c": "file 0601 empty", + }, +}, { + summary: "Implicit parent directories with different target path", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + Dir(0701, "./a/"), + Dir(0702, "./b/"), + Reg(0601, "./b/x", "shark"), + Dir(0703, "./c/"), + Reg(0602, "./c/y", "octopus"), + Dir(0704, "./d/"), + }), + options: deb.ExtractOptions{ + Extract: map[string][]deb.ExtractInfo{ + "/b/x": []deb.ExtractInfo{{Path: "/a/x"}}, + "/c/y": []deb.ExtractInfo{{Path: "/d/y"}}, + }, + }, + result: map[string]string{ + "/a/": "dir 0701", + "/a/x": "file 0601 31fc1594", + "/d/": "dir 0704", + "/d/y": "file 0602 5633c9b8", + }, +}, { + summary: "Implicit parent directories with a glob", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + Dir(0701, "./a/"), + Dir(0702, "./a/aa/"), + Dir(0703, "./a/aa/aaa/"), + Reg(0601, "./a/aa/aaa/ffff", ""), + }), + options: deb.ExtractOptions{ + Extract: map[string][]deb.ExtractInfo{ + "/a/aa/a**": []deb.ExtractInfo{{ + Path: "/a/aa/a**", + }}, + }, + }, + result: map[string]string{ + "/a/": "dir 0701", + "/a/aa/": "dir 0702", + "/a/aa/aaa/": "dir 0703", + "/a/aa/aaa/ffff": "file 0601 empty", + }, +}, { + summary: "Implicit parent directories with a glob and non-sorted tarball", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + Reg(0601, "./a/b/c/d", ""), + Dir(0702, "./a/b/"), + Dir(0703, "./a/b/c/"), + Dir(0701, "./a/"), + }), + options: deb.ExtractOptions{ + Extract: map[string][]deb.ExtractInfo{ + "/a/b/c/*": []deb.ExtractInfo{{ + Path: "/a/b/c/*", + }}, + }, + }, + result: map[string]string{ + "/a/": "dir 0701", + "/a/b/": "dir 0702", + "/a/b/c/": "dir 0703", + "/a/b/c/d": "file 0601 empty", + }, +}, { + summary: "Implicit parent directories with a glob and some parents missing in the tarball", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + Reg(0601, "./a/b/c/d", ""), + Dir(0702, "./a/b/"), + }), + options: deb.ExtractOptions{ + Extract: map[string][]deb.ExtractInfo{ + "/a/b/c/*": []deb.ExtractInfo{{ + Path: "/a/b/c/*", + }}, + }, + }, + result: map[string]string{ + "/a/": "dir 0755", + "/a/b/": "dir 0702", + "/a/b/c/": "dir 0755", + "/a/b/c/d": "file 0601 empty", + }, +}, { + summary: "Implicit parent directories with copied dirs and different modes", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + Dir(0701, "./a/"), + Dir(0702, "./a/b/"), + Dir(0703, "./a/b/c/"), + Reg(0601, "./a/b/c/d", ""), + Dir(0704, "./e/"), + Dir(0705, "./e/f/"), + }), + options: deb.ExtractOptions{ + Extract: map[string][]deb.ExtractInfo{ + "/a/b/**": []deb.ExtractInfo{{ + Path: "/a/b/**", + }}, + "/e/f/": []deb.ExtractInfo{{ + Path: "/a/", + }}, + "/e/": []deb.ExtractInfo{{ + Path: "/a/b/c/", + Mode: 0706, + }}, + }, + }, + result: map[string]string{ + "/a/": "dir 0705", + "/a/b/": "dir 0702", + "/a/b/c/": "dir 0706", + "/a/b/c/d": "file 0601 empty", + }, }} func (s *S) TestExtract(c *C) { diff --git a/internal/slicer/slicer.go b/internal/slicer/slicer.go index 9cd35aec..3152ca67 100644 --- a/internal/slicer/slicer.go +++ b/internal/slicer/slicer.go @@ -109,14 +109,13 @@ func Run(options *RunOptions) error { hasCopyright = true } } else { - targetDir := fsutil.SlashedPathDir(targetPath) - if targetDir == "" || targetDir == "/" { - continue + parent := fsutil.SlashedPathDir(targetPath) + for ; parent != "/"; parent = fsutil.SlashedPathDir(parent) { + extractPackage[parent] = append(extractPackage[parent], deb.ExtractInfo{ + Path: parent, + Optional: true, + }) } - extractPackage[targetDir] = append(extractPackage[targetDir], deb.ExtractInfo{ - Path: targetDir, - Optional: true, - }) } } if !hasCopyright { From 1424faa79dd976fbfec9d9c72f521ec220554423 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Virtus?= Date: Thu, 15 Jun 2023 11:17:53 +0200 Subject: [PATCH 5/8] extract: Proper modes with multiple copies We use the source tar entry header to compute fs.FileMode. When the target path has a different mode, we modify the source header first. Consequently, when a source path is mapped to multiple target paths, and when one target path overrides the mode and the following one doesn't, the following one will have the first one's mode. Fix it by remembering the source header mode. --- internal/deb/extract.go | 2 ++ internal/deb/extract_test.go | 26 ++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/internal/deb/extract.go b/internal/deb/extract.go index 3b837bb5..98dba0b3 100644 --- a/internal/deb/extract.go +++ b/internal/deb/extract.go @@ -245,6 +245,7 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error { } var pathReader io.Reader = tarReader + origMode := tarHeader.Mode for _, extractInfo := range extractInfos { if contentIsCached { pathReader = bytes.NewReader(contentCache) @@ -258,6 +259,7 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error { if err := createParents(targetPath); err != nil { return err } + tarHeader.Mode = origMode if extractInfo.Mode != 0 { tarHeader.Mode = int64(extractInfo.Mode) } diff --git a/internal/deb/extract_test.go b/internal/deb/extract_test.go index 7de591ab..e86b4553 100644 --- a/internal/deb/extract_test.go +++ b/internal/deb/extract_test.go @@ -413,6 +413,32 @@ var extractTests = []extractTest{{ "/a/b/c/": "dir 0706", "/a/b/c/d": "file 0601 empty", }, +}, { + summary: "Copies with different permissions", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + Dir(0701, "./a/"), + Reg(0601, "./b", ""), + }), + options: deb.ExtractOptions{ + Extract: map[string][]deb.ExtractInfo{ + "/a/": []deb.ExtractInfo{ + {Path: "/b/"}, + {Path: "/c/", Mode: 0702}, + {Path: "/d/", Mode: 01777}, + {Path: "/e/"}, + {Path: "/f/", Mode: 0723}, + {Path: "/g/"}, + }, + }, + }, + result: map[string]string{ + "/b/": "dir 0701", + "/c/": "dir 0702", + "/d/": "dir 01777", + "/e/": "dir 0701", + "/f/": "dir 0723", + "/g/": "dir 0701", + }, }} func (s *S) TestExtract(c *C) { From ec34abb1060de76debe7cfebc03244f02c2c3e3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Virtus?= Date: Thu, 15 Jun 2023 01:57:58 +0200 Subject: [PATCH 6/8] extract: Add callbacks This commit adds support for callbacks from deb/extract.go. These callbacks will be used by the Chisel DB implementation that will follow to record metadata about installed paths. The reason for adding callbacks instead of handling path recording in deb/extract is to keep the concerns separated between internal packages. In Chisel DB we need to record paths to slices relationship and digests of regular files. But deb/extract doesn't know about slices. slicer on the other hand knows about slices, and so it's the proper place to drive Chisel DB creation. But slicer needs to cooperate with deb/extract because it doesn't know which paths will be matched by globs and because it needs the content of matched regular files. The callbacks provide a mechanism for slicer to accomplish this. --- internal/deb/extract.go | 101 +++++++++++++--- internal/deb/extract_test.go | 228 +++++++++++++++++++++++++++++++++++ 2 files changed, 312 insertions(+), 17 deletions(-) diff --git a/internal/deb/extract.go b/internal/deb/extract.go index 98dba0b3..27f644f5 100644 --- a/internal/deb/extract.go +++ b/internal/deb/extract.go @@ -21,11 +21,56 @@ import ( "github.com/canonical/chisel/internal/strdist" ) +type ConsumeData func(reader io.Reader) error + +// DataCallback function is called when a tarball entry of a regular file is +// going to be extracted. +// +// The source and size parameters are set to the file's path in the tarball and +// the size of its content, respectively. When the returned ConsumeData function +// is not nil, it is called with the file's content. +// +// When the source path occurs in the tarball more than once, this function will +// be called for each occurrence. +// +// If either DataCallback or ConsumeData function returns a non-nil error, the +// Extract() function will fail with that error. +type DataCallback func(source string, size int64) (ConsumeData, error) + +// The CreateCallback function is called when a tarball entry is going to be +// extracted to a target path. +// +// The target, link, and mode parameters are set to the target's path, symbolic +// link target, and mode, respectively. The target's filesystem type can be +// determined from these parameters. If the link is not empty, the target is a +// symbolic link. Otherwise, if the target's path ends with /, it is a +// directory. Otherwise, it is a regular file. +// +// When the source parameter is not empty, the target is going to be extracted +// from a tarball entry with the source path. The function may be called more +// than once with the same source when the tarball entry is extracted to +// multiple different targets. +// +// Otherwise, the mode is 0755 and the target is going to be an implicitly +// created parent directory of another target, and when a directory entry with +// that path is later encountered in the tarball with a different mode, the +// function will be called again with the same target, source equal to the +// target, and the different mode. +// +// When the source path occurs in the tarball more than once, this function will +// be called for each target path for each occurrence. +// +// If CreateCallback function returns a non-nil error, the Extract() function +// will fail with that error. +type CreateCallback func(source, target, link string, mode fs.FileMode) error + type ExtractOptions struct { Package string TargetDir string Extract map[string][]ExtractInfo Globbed map[string][]string + OnData DataCallback + OnCreate CreateCallback } type ExtractInfo struct { @@ -209,14 +254,21 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error { return nil } info := dirInfos[dir] + source := dir if info.created { return nil } else if info.mode == 0 { info.mode = fs.ModeDir | 0755 + source = "" } if err := createParents(dir); err != nil { return err } + if options.OnCreate != nil { + if err := options.OnCreate(source, dir, "", info.mode); err != nil { + return err + } + } create := fsutil.CreateOptions{ Path: filepath.Join(options.TargetDir, dir), Mode: info.mode, @@ -229,27 +281,37 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error { return nil } - var contentCache []byte - var contentIsCached = len(extractInfos) > 1 && sourceMode.IsRegular() && globPath == "" - if contentIsCached { - // Read and cache the content so it may be reused. - // As an alternative, to avoid having an entire file in - // memory at once this logic might open the first file - // written and copy it every time. For now, the choice - // is speed over memory efficiency. - data, err := io.ReadAll(tarReader) - if err != nil { - return err + getReader := func() io.Reader { return tarReader } + + if sourceMode.IsRegular() { + var consumeData ConsumeData + if options.OnData != nil { + var err error + if consumeData, err = options.OnData(sourcePath, tarHeader.Size); err != nil { + return err + } + } + if consumeData != nil || (len(extractInfos) > 1 && globPath == "") { + // Read and cache the content so it may be reused. + // As an alternative, to avoid having an entire file in + // memory at once this logic might open the first file + // written and copy it every time. For now, the choice + // is speed over memory efficiency. + data, err := io.ReadAll(tarReader) + if err != nil { + return err + } + getReader = func() io.Reader { return bytes.NewReader(data) } + } + if consumeData != nil { + if err := consumeData(getReader()); err != nil { + return err + } } - contentCache = data } - var pathReader io.Reader = tarReader origMode := tarHeader.Mode for _, extractInfo := range extractInfos { - if contentIsCached { - pathReader = bytes.NewReader(contentCache) - } var targetPath string if globPath == "" { targetPath = extractInfo.Path @@ -264,10 +326,15 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error { tarHeader.Mode = int64(extractInfo.Mode) } fsMode := tarHeader.FileInfo().Mode() + if options.OnCreate != nil { + if err := options.OnCreate(sourcePath, targetPath, tarHeader.Linkname, fsMode); err != nil { + return err + } + } err := fsutil.Create(&fsutil.CreateOptions{ Path: filepath.Join(options.TargetDir, targetPath), Mode: fsMode, - Data: pathReader, + Data: getReader(), Link: tarHeader.Linkname, }) if err != nil { diff --git a/internal/deb/extract_test.go b/internal/deb/extract_test.go index e86b4553..f18ad24f 100644 --- a/internal/deb/extract_test.go +++ b/internal/deb/extract_test.go @@ -2,6 +2,8 @@ package deb_test import ( "bytes" + "io" + "io/fs" . "gopkg.in/check.v1" @@ -470,3 +472,229 @@ func (s *S) TestExtract(c *C) { c.Assert(result, DeepEquals, test.result) } } + +type callbackTest struct { + summary string + pkgdata []byte + extract map[string][]deb.ExtractInfo + callbacks [][]any + noConsume bool + noData bool +} + +var callbackTests = []callbackTest{{ + summary: "Trivial", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + Dir(0701, "./a/"), + Dir(0702, "./a/b/"), + Reg(0601, "./a/b/c", ""), + }), + extract: map[string][]deb.ExtractInfo{ + "/**": []deb.ExtractInfo{{Path: "/**"}}, + }, + callbacks: [][]any{ + {"create", "/a/", "/a/", "", 0701}, + {"create", "/a/b/", "/a/b/", "", 0702}, + {"data", "/a/b/c", 0, []byte{}}, + {"create", "/a/b/c", "/a/b/c", "", 0601}, + }, +}, { + summary: "Data", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + Dir(0701, "./a/"), + Reg(0601, "./a/b", "foo"), + Reg(0602, "./a/c", "bar"), + }), + extract: map[string][]deb.ExtractInfo{ + "/**": []deb.ExtractInfo{{Path: "/**"}}, + }, + callbacks: [][]any{ + {"create", "/a/", "/a/", "", 0701}, + {"data", "/a/b", 3, []byte("foo")}, + {"create", "/a/b", "/a/b", "", 0601}, + {"data", "/a/c", 3, []byte("bar")}, + {"create", "/a/c", "/a/c", "", 0602}, + }, +}, { + summary: "Symlinks", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + Reg(0601, "./a", ""), + Lnk(0777, "./b", "/a"), + Lnk(0777, "./c", "/d"), + }), + extract: map[string][]deb.ExtractInfo{ + "/**": []deb.ExtractInfo{{Path: "/**"}}, + }, + noData: true, + callbacks: [][]any{ + {"create", "/a", "/a", "", 0601}, + {"create", "/b", "/b", "/a", 0777}, + {"create", "/c", "/c", "/d", 0777}, + }, +}, { + summary: "Simple copied paths", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + Reg(0601, "./a", ""), + Reg(0602, "./b", ""), + Dir(0701, "./c/"), + Reg(0603, "./c/d", ""), + }), + extract: map[string][]deb.ExtractInfo{ + "/a": []deb.ExtractInfo{{Path: "/a"}}, + "/c/d": []deb.ExtractInfo{{Path: "/b"}}, + }, + noData: true, + callbacks: [][]any{ + {"create", "/a", "/a", "", 0601}, + {"create", "/c/d", "/b", "", 0603}, + }, +}, { + summary: "Parent directories", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + Dir(0701, "./a/"), + Dir(0702, "./a/b/"), + Dir(0703, "./a/b/c/"), + Reg(0601, "./a/b/c/d", ""), + }), + extract: map[string][]deb.ExtractInfo{ + "/a/b/c/": []deb.ExtractInfo{{Path: "/a/b/c/"}}, + }, + callbacks: [][]any{ + {"create", "/a/", "/a/", "", 0701}, + {"create", "/a/b/", "/a/b/", "", 0702}, + {"create", "/a/b/c/", "/a/b/c/", "", 0703}, + }, +}, { + summary: "Parent directories with globs", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + Dir(0701, "./a/"), + Dir(0702, "./a/b/"), + Dir(0703, "./a/b/c/"), + Reg(0601, "./a/b/c/d", ""), + }), + extract: map[string][]deb.ExtractInfo{ + "/a/b/*/": []deb.ExtractInfo{{Path: "/a/b/*/"}}, + }, + callbacks: [][]any{ + {"create", "/a/", "/a/", "", 0701}, + {"create", "/a/b/", "/a/b/", "", 0702}, + {"create", "/a/b/c/", "/a/b/c/", "", 0703}, + }, +}, { + summary: "Parent directories out of order", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + Reg(0601, "./a/b/c/d", ""), + Dir(0703, "./a/b/c/"), + Dir(0702, "./a/b/"), + Dir(0701, "./a/"), + }), + extract: map[string][]deb.ExtractInfo{ + "/a/b/*/": []deb.ExtractInfo{{Path: "/a/b/*/"}}, + }, + callbacks: [][]any{ + {"create", "", "/a/", "", 0755}, + {"create", "", "/a/b/", "", 0755}, + {"create", "/a/b/c/", "/a/b/c/", "", 0703}, + {"create", "/a/b/", "/a/b/", "", 0702}, + {"create", "/a/", "/a/", "", 0701}, + }, +}, { + summary: "Parent directories with early copy path", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + Dir(0701, "./a/"), + Reg(0601, "./a/b", ""), + Dir(0702, "./c/"), + Reg(0602, "./c/d", ""), + }), + extract: map[string][]deb.ExtractInfo{ + "/a/b": []deb.ExtractInfo{{Path: "/c/d"}}, + }, + noData: true, + callbacks: [][]any{ + {"create", "", "/c/", "", 0755}, + {"create", "/a/b", "/c/d", "", 0601}, + {"create", "/c/", "/c/", "", 0702}, + }, +}, { + summary: "Same file twice with different content", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + Reg(0601, "./a", "foo"), + Reg(0602, "./b", "bar"), + Reg(0603, "./a", "baz"), + }), + extract: map[string][]deb.ExtractInfo{ + "/*": []deb.ExtractInfo{{Path: "/*"}}, + }, + callbacks: [][]any{ + {"data", "/a", 3, []byte("foo")}, + {"create", "/a", "/a", "", 0601}, + {"data", "/b", 3, []byte("bar")}, + {"create", "/b", "/b", "", 0602}, + {"data", "/a", 3, []byte("baz")}, + {"create", "/a", "/a", "", 0603}, + }, +}, { + summary: "Source with multiple targets", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + Reg(0601, "./a", "aaa"), + Reg(0602, "./b", "bu bu bu"), + }), + extract: map[string][]deb.ExtractInfo{ + "/a": []deb.ExtractInfo{{Path: "/b"}}, + "/b": []deb.ExtractInfo{ + {Path: "/c", Mode: 0603}, + {Path: "/d"}, + }, + }, + callbacks: [][]any{ + {"data", "/a", 3, []byte("aaa")}, + {"create", "/a", "/b", "", 0601}, + {"data", "/b", 8, []byte("bu bu bu")}, + {"create", "/b", "/c", "", 0603}, + {"create", "/b", "/d", "", 0602}, + }, +}} + +func (s *S) TestExtractCallbacks(c *C) { + for _, test := range callbackTests { + c.Logf("Test: %s", test.summary) + dir := c.MkDir() + var callbacks [][]any + onData := func(source string, size int64) (deb.ConsumeData, error) { + if test.noConsume { + args := []any{"data", source, int(size), nil} + callbacks = append(callbacks, args) + return nil, nil + } + consume := func(reader io.Reader) error { + data, err := io.ReadAll(reader) + if err != nil { + return err + } + args := []any{"data", source, int(size), data} + callbacks = append(callbacks, args) + return nil + } + return consume, nil + } + if test.noData { + onData = nil + } + onCreate := func(source, target, link string, mode fs.FileMode) error { + modeInt := int(07777 & mode) + args := []any{"create", source, target, link, modeInt} + callbacks = append(callbacks, args) + return nil + } + options := deb.ExtractOptions{ + Package: "test", + TargetDir: dir, + Extract: test.extract, + OnData: onData, + OnCreate: onCreate, + } + err := deb.Extract(bytes.NewBuffer(test.pkgdata), &options) + c.Assert(err, IsNil) + c.Assert(callbacks, DeepEquals, test.callbacks) + } +} From bd4a2ee5a4bd8ac25b37d228d0577ec0f2c5cf90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Virtus?= Date: Thu, 15 Jun 2023 21:02:29 +0200 Subject: [PATCH 7/8] db: Add Chisel DB v0.1 skeleton This commit adds the skeleton db package that will grow into the full Chisel DB implementation with forthcoming commits. The package contains only New(), Save(), and Load() functions for creating, writing, and reading the database, respectively. The API exposes underlying jsonwall interfaces for both reading and writing. An attempt was made to build an abstraction on top of jsonwall, but it was later abandoned because of Go type system limitations and difficulty to test. The schema is set to 0.1 when writing. An error is returned when the schema isn't 0.1 when reading. The schema and database location in the output root directory are implementation details that are not exposed by the API. --- internal/db/db.go | 80 ++++++++++++++++++++++++++++++++++++++ internal/db/db_test.go | 74 +++++++++++++++++++++++++++++++++++ internal/db/export_test.go | 3 ++ internal/db/suite_test.go | 15 +++++++ 4 files changed, 172 insertions(+) create mode 100644 internal/db/db.go create mode 100644 internal/db/db_test.go create mode 100644 internal/db/export_test.go create mode 100644 internal/db/suite_test.go diff --git a/internal/db/db.go b/internal/db/db.go new file mode 100644 index 00000000..8e325631 --- /dev/null +++ b/internal/db/db.go @@ -0,0 +1,80 @@ +package db + +import ( + "fmt" + "os" + "path/filepath" + + "github.com/canonical/chisel/internal/jsonwall" + "github.com/klauspost/compress/zstd" +) + +const schema = "0.1" + +// New creates a new Chisel DB writer with the proper schema. +func New() *jsonwall.DBWriter { + options := jsonwall.DBWriterOptions{Schema: schema} + return jsonwall.NewDBWriter(&options) +} + +func getDBPath(root string) string { + return filepath.Join(root, ".chisel.db") +} + +// Save uses the provided writer dbw to write the Chisel DB into the standard +// path under the provided root directory. +func Save(dbw *jsonwall.DBWriter, root string) (err error) { + dbPath := getDBPath(root) + defer func() { + if err != nil { + err = fmt.Errorf("cannot save state to %q: %w", dbPath, err) + } + }() + f, err := os.OpenFile(dbPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) + if err != nil { + return + } + defer f.Close() + // chmod the existing file + if err = f.Chmod(0644); err != nil { + return + } + zw, err := zstd.NewWriter(f) + if err != nil { + return + } + if _, err = dbw.WriteTo(zw); err != nil { + return + } + return zw.Close() +} + +// Load reads a Chisel DB from the standard path under the provided root +// directory. If the Chisel DB doesn't exist, the returned error satisfies +// errors.Is(err, fs.ErrNotExist)) +func Load(root string) (db *jsonwall.DB, err error) { + dbPath := getDBPath(root) + defer func() { + if err != nil { + err = fmt.Errorf("cannot load state from %q: %w", dbPath, err) + } + }() + f, err := os.Open(dbPath) + if err != nil { + return + } + defer f.Close() + zr, err := zstd.NewReader(f) + if err != nil { + return + } + defer zr.Close() + db, err = jsonwall.ReadDB(zr) + if err != nil { + return nil, err + } + if s := db.Schema(); s != schema { + return nil, fmt.Errorf("invalid schema %#v", s) + } + return +} diff --git a/internal/db/db_test.go b/internal/db/db_test.go new file mode 100644 index 00000000..32ec2f7d --- /dev/null +++ b/internal/db/db_test.go @@ -0,0 +1,74 @@ +package db_test + +import ( + "sort" + + "github.com/canonical/chisel/internal/db" + . "gopkg.in/check.v1" +) + +type testEntry struct { + S string `json:"s,omitempty"` + I int64 `json:"i,omitempty"` + L []string `json:"l,omitempty"` + M map[string]bool `json:"m,omitempty"` +} + +var saveLoadTestCase = []testEntry{ + {"", 0, nil, nil}, + {"hello", -1, nil, nil}, + {"", 0, nil, nil}, + {"", 100, []string{"a", "b"}, nil}, + {"", 0, nil, map[string]bool{"a": true, "b": false}}, + {"abc", 123, []string{"foo", "bar"}, nil}, +} + +func (s *S) TestSaveLoadRoundTrip(c *C) { + // To compare expected and obtained entries we first wrap the original + // entries in wrappers with increasing K. When we read the wrappers back + // they may be in different order because jsonwall sorts them serialized + // as JSON. So we sort them by K to compare them in the original order. + + type wrapper struct { + // test values + testEntry + // sort key for comparison + K int `json:"key"` + } + + // wrap the entries with increasing K + expected := make([]wrapper, len(saveLoadTestCase)) + for i, entry := range saveLoadTestCase { + expected[i] = wrapper{entry, i} + } + + workDir := c.MkDir() + dbw := db.New() + for _, entry := range expected { + err := dbw.Add(entry) + c.Assert(err, IsNil) + } + err := db.Save(dbw, workDir) + c.Assert(err, IsNil) + + dbr, err := db.Load(workDir) + c.Assert(err, IsNil) + c.Assert(dbr.Schema(), Equals, db.Schema) + + iter, err := dbr.Iterate(nil) + c.Assert(err, IsNil) + + obtained := make([]wrapper, 0, len(expected)) + for iter.Next() { + var wrapped wrapper + err := iter.Get(&wrapped) + c.Assert(err, IsNil) + obtained = append(obtained, wrapped) + } + + // sort the entries by K to get the original order + sort.Slice(obtained, func(i, j int) bool { + return obtained[i].K < obtained[j].K + }) + c.Assert(obtained, DeepEquals, expected) +} diff --git a/internal/db/export_test.go b/internal/db/export_test.go new file mode 100644 index 00000000..f04ccf3b --- /dev/null +++ b/internal/db/export_test.go @@ -0,0 +1,3 @@ +package db + +var Schema = schema diff --git a/internal/db/suite_test.go b/internal/db/suite_test.go new file mode 100644 index 00000000..4334c717 --- /dev/null +++ b/internal/db/suite_test.go @@ -0,0 +1,15 @@ +package db_test + +import ( + "testing" + + . "gopkg.in/check.v1" +) + +func Test(t *testing.T) { + TestingT(t) +} + +type S struct{} + +var _ = Suite(&S{}) From e29baa1dfcd9f5afb29884ceeaba416ccdfdbad7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Virtus?= Date: Thu, 15 Jun 2023 21:23:22 +0200 Subject: [PATCH 8/8] db: Add types This commit adds types used by Chisel DB along with JSON serialization code. There are public types which don't contain any serialization-specific fields, like kind, and all their fields are assignable from data types used during slicing. They have private counterparts that are used for JSON serialization. --- internal/db/types.go | 218 ++++++++++++++++++++++++++++++++++++++ internal/db/types_test.go | 188 ++++++++++++++++++++++++++++++++ 2 files changed, 406 insertions(+) create mode 100644 internal/db/types.go create mode 100644 internal/db/types_test.go diff --git a/internal/db/types.go b/internal/db/types.go new file mode 100644 index 00000000..ce42bf3c --- /dev/null +++ b/internal/db/types.go @@ -0,0 +1,218 @@ +package db + +import ( + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "io/fs" + "strconv" +) + +func decodeDigest(str string) (*[sha256.Size]byte, error) { + if str == "" { + return nil, nil + } + if len(str) != 2*sha256.Size { + return nil, fmt.Errorf("length %d != %d", len(str), 2*sha256.Size) + } + sl, err := hex.DecodeString(str) + if err != nil { + return nil, err + } + var digest [sha256.Size]byte + copy(digest[:], sl) + return &digest, nil +} + +// The *alias types exist to avoid recursive marshaling, see +// https://choly.ca/post/go-json-marshalling/. + +// Package represents a package that was sliced +type Package struct { + Name string `json:"name"` + Version string `json:"version"` + // package digests in chisel are used only in hex encoding so there's + // currently no need to store them as byte arrays + SHA256 string `json:"sha256"` + Arch string `json:"arch"` +} + +type packageAlias Package + +type jsonPackage struct { + Kind string `json:"kind"` + packageAlias +} + +func (t Package) MarshalJSON() ([]byte, error) { + j := jsonPackage{"package", packageAlias(t)} + return json.Marshal(j) +} + +func (t *Package) UnmarshalJSON(data []byte) error { + j := jsonPackage{} + if err := json.Unmarshal(data, &j); err != nil { + return err + } + if j.Kind != "package" { + return fmt.Errorf(`invalid kind %q: must be "package"`, j.Kind) + } + *t = Package(j.packageAlias) + return nil +} + +// Slice represents a slice of a package that was installed +type Slice struct { + Name string `json:"name"` +} + +type sliceAlias Slice + +type jsonSlice struct { + Kind string `json:"kind"` + sliceAlias +} + +func (t Slice) MarshalJSON() ([]byte, error) { + j := jsonSlice{"slice", sliceAlias(t)} + return json.Marshal(j) +} + +func (t *Slice) UnmarshalJSON(data []byte) error { + j := jsonSlice{} + if err := json.Unmarshal(data, &j); err != nil { + return err + } + if j.Kind != "slice" { + return fmt.Errorf(`invalid kind %q: must be "slice"`, j.Kind) + } + *t = Slice(j.sliceAlias) + return nil +} + +// Path represents a path that was sliced from a package. +// +// The filesystem object type can be determined from Path's fields: +// +// a) If the SHA256 attribute is non-nil, this Path refers to a regular file, +// its Path attribute must not end with /, and its Link attribute must be +// empty. +// c) Otherwise, if the Link attribute is not empty, this Path refers to a +// symbolic link. +// c) Otherwise, this Path refers to a directory, and its Path attribute must +// end with /. +// +// If the Path refers to a regular file and its FinalSHA256 attribute is +// non-nil, the SHA256 attribute must have a different value, and this Path +// represents a regular file whose content in the package had digest equal to +// the SHA256 attribute, but its content has changed during the installation +// and the final digest of the content is equal to the FinalSHA256 attribute. +type Path struct { + Path string + Mode fs.FileMode + Slices []string + SHA256 *[sha256.Size]byte + FinalSHA256 *[sha256.Size]byte + Size int64 + Link string +} + +type jsonPath struct { + Kind string `json:"kind"` + Path string `json:"path"` + Mode string `json:"mode"` + Slices []string `json:"slices"` + SHA256 string `json:"sha256,omitempty"` + FinalSHA256 string `json:"final_sha256,omitempty"` + Size *int64 `json:"size,omitempty"` + Link string `json:"link,omitempty"` +} + +func (t Path) MarshalJSON() ([]byte, error) { + j := jsonPath{ + Kind: "path", + Path: t.Path, + Mode: fmt.Sprintf("%#o", t.Mode), + Slices: t.Slices, + Link: t.Link, + } + if t.Slices == nil { + j.Slices = []string{} + } + if t.SHA256 != nil { + j.SHA256 = fmt.Sprintf("%x", *t.SHA256) + if t.FinalSHA256 != nil { + j.FinalSHA256 = fmt.Sprintf("%x", *t.FinalSHA256) + } + j.Size = &t.Size + } + return json.Marshal(j) +} + +func (t *Path) UnmarshalJSON(data []byte) error { + j := jsonPath{} + if err := json.Unmarshal(data, &j); err != nil { + return err + } + if j.Kind != "path" { + return fmt.Errorf(`invalid kind %q: must be "path"`, j.Kind) + } + mode, err := strconv.ParseUint(j.Mode, 8, 32) + if err != nil { + return fmt.Errorf("invalid mode %#v: %w", j.Mode, err) + } + digest, err := decodeDigest(j.SHA256) + if err != nil { + return fmt.Errorf("invalid sha256 %#v: %w", j.SHA256, err) + } + finalDigest, err := decodeDigest(j.FinalSHA256) + if err != nil { + return fmt.Errorf("invalid final_sha256 %#v: %w", j.SHA256, err) + } + t.Path = j.Path + t.Mode = fs.FileMode(mode) + t.Slices = j.Slices + if t.Slices != nil && len(t.Slices) == 0 { + t.Slices = nil + } + t.SHA256 = digest + t.FinalSHA256 = finalDigest + t.Size = 0 + if j.Size != nil { + t.Size = *j.Size + } + t.Link = j.Link + return nil +} + +// Content represents an ownership of a path by a slice. There can be more than +// one slice owning a path. +type Content struct { + Slice string `json:"slice"` + Path string `json:"path"` +} + +type contentAlias Content + +type jsonContent struct { + Kind string `json:"kind"` + contentAlias +} + +func (t Content) MarshalJSON() ([]byte, error) { + j := jsonContent{"content", contentAlias(t)} + return json.Marshal(j) +} + +func (t *Content) UnmarshalJSON(data []byte) error { + j := jsonContent{} + if err := json.Unmarshal(data, &j); err != nil { + return err + } + if j.Kind != "content" { + return fmt.Errorf(`invalid kind %q: must be "content"`, j.Kind) + } + *t = Content(j.contentAlias) + return nil +} diff --git a/internal/db/types_test.go b/internal/db/types_test.go new file mode 100644 index 00000000..c5c2bd07 --- /dev/null +++ b/internal/db/types_test.go @@ -0,0 +1,188 @@ +package db_test + +import ( + "encoding/json" + "reflect" + + "github.com/canonical/chisel/internal/db" + . "gopkg.in/check.v1" +) + +type roundTripTestCase struct { + value any + json string +} + +var roundTripTestCases = []roundTripTestCase{{ + db.Package{"foo", "bar", "", ""}, + `{"kind":"package","name":"foo","version":"bar","sha256":"","arch":""}`, +}, { + db.Package{"coolutils", "1.2-beta", "bbbaa816dedc5c5d58e30e7ed600fe62ea0208ef2d6fac4da8b312d113401958", "all"}, + `{"kind":"package","name":"coolutils","version":"1.2-beta","sha256":"bbbaa816dedc5c5d58e30e7ed600fe62ea0208ef2d6fac4da8b312d113401958","arch":"all"}`, +}, { + db.Package{"badcowboys", "7", "d0aba6d028cd4a3fd153eb5e0bfb35c33f4d5674b80a7a827917df40e1192424", "amd64"}, + `{"kind":"package","name":"badcowboys","version":"7","sha256":"d0aba6d028cd4a3fd153eb5e0bfb35c33f4d5674b80a7a827917df40e1192424","arch":"amd64"}`, +}, { + db.Slice{"elitestrike_bins"}, + `{"kind":"slice","name":"elitestrike_bins"}`, +}, { + db.Slice{"invalid but unmarshals"}, + `{"kind":"slice","name":"invalid but unmarshals"}`, +}, { + db.Path{ + Path: "/bin/snake", + Mode: 0755, + Slices: []string{"snake_bins"}, + SHA256: &[...]byte{ + 0xa0, 0x1b, 0xab, 0x26, 0xf0, 0x8b, 0xa8, 0x7b, 0x86, + 0x73, 0x63, 0x68, 0xb0, 0x68, 0x4f, 0x08, 0x49, 0xa3, + 0x65, 0xab, 0x4c, 0x5e, 0xc5, 0x46, 0xd9, 0x73, 0xca, + 0x87, 0xc8, 0x15, 0xf6, 0x82, + }, + Size: 13, + }, + `{"kind":"path","path":"/bin/snake","mode":"0755","slices":["snake_bins"],"sha256":"a01bab26f08ba87b86736368b0684f0849a365ab4c5ec546d973ca87c815f682","size":13}`, +}, { + db.Path{ + Path: "/etc/default/", + Mode: 0750, + Slices: []string{"someconfig_data", "mytoo_data"}, + }, + `{"kind":"path","path":"/etc/default/","mode":"0750","slices":["someconfig_data","mytoo_data"]}`, +}, { + db.Path{ + Path: "/var/lib/matt/index.data", + Mode: 0600, + Slices: []string{"daemon_data"}, + SHA256: &[...]byte{ + 0x06, 0x82, 0xc5, 0xf2, 0x07, 0x6f, 0x09, 0x9c, 0x34, + 0xcf, 0xdd, 0x15, 0xa9, 0xe0, 0x63, 0x84, 0x9e, 0xd4, + 0x37, 0xa4, 0x96, 0x77, 0xe6, 0xfc, 0xc5, 0xb4, 0x19, + 0x8c, 0x76, 0x57, 0x5b, 0xe5, + }, + FinalSHA256: &[...]byte{ + 0xd7, 0xd5, 0xdc, 0xc3, 0x69, 0x42, 0x6e, 0x2e, 0x5f, + 0x8d, 0xcb, 0x89, 0xaf, 0x43, 0x08, 0xb0, 0xda, 0xed, + 0x6e, 0x55, 0x91, 0x0d, 0x53, 0x39, 0x5c, 0xe3, 0x8b, + 0xd6, 0xdd, 0x1a, 0x94, 0x56, + }, + Size: 999, + }, + `{"kind":"path","path":"/var/lib/matt/index.data","mode":"0600","slices":["daemon_data"],"sha256":"0682c5f2076f099c34cfdd15a9e063849ed437a49677e6fcc5b4198c76575be5","final_sha256":"d7d5dcc369426e2e5f8dcb89af4308b0daed6e55910d53395ce38bd6dd1a9456","size":999}`, +}, { + db.Path{ + Path: "/etc/config", + Mode: 0644, + SHA256: &[32]byte{}, + }, + `{"kind":"path","path":"/etc/config","mode":"0644","slices":[],"sha256":"0000000000000000000000000000000000000000000000000000000000000000","size":0}`, +}, { + db.Path{ + Path: "/lib", + Mode: 0777, + Slices: []string{"libc6_libs", "zlib1g_libs"}, + Link: "/usr/lib/", + }, + `{"kind":"path","path":"/lib","mode":"0777","slices":["libc6_libs","zlib1g_libs"],"link":"/usr/lib/"}`, +}, { + db.Path{}, + `{"kind":"path","path":"","mode":"0","slices":[]}`, +}, { + db.Path{Mode: 077777}, + `{"kind":"path","path":"","mode":"077777","slices":[]}`, +}, { + db.Content{"foo_sl", "/a/b/c"}, + `{"kind":"content","slice":"foo_sl","path":"/a/b/c"}`, +}} + +func (s *S) TestMarshalUnmarshalRoundTrip(c *C) { + for i, test := range roundTripTestCases { + c.Logf("Test #%d", i) + data, err := json.Marshal(test.value) + c.Assert(err, IsNil) + c.Assert(string(data), DeepEquals, test.json) + ptrOut := reflect.New(reflect.ValueOf(test.value).Type()) + err = json.Unmarshal(data, ptrOut.Interface()) + c.Assert(err, IsNil) + c.Assert(ptrOut.Elem().Interface(), DeepEquals, test.value) + } +} + +type unmarshalTestCase struct { + json string + value any + error string +} + +var unmarshalTestCases = []unmarshalTestCase{{ + json: `{"kind":"package","name":"pkg","version":"1.1","sha256":"d0aba6d028cd4a3fd153eb5e0bfb35c33f4d5674b80a7a827917df40e1192424","arch":"all"}`, + value: db.Package{"pkg", "1.1", "d0aba6d028cd4a3fd153eb5e0bfb35c33f4d5674b80a7a827917df40e1192424", "all"}, +}, { + json: `{"kind":"slice","name":"a"}`, + value: db.Slice{"a"}, +}, { + json: `{"kind":"path","path":"/x/y/z","mode":"0644","slices":["pkg1_data","pkg2_data"],"sha256":"f177b37f18f5bc6596878f074721d796c2333d95f26ce1e45c5a096c350a1c07","final_sha256":"61bd495076999a77f75288fcfcdd76073ec4aa114632a310b3b3263c498e12f7","size":123}`, + value: db.Path{ + Path: "/x/y/z", + Mode: 0644, + Slices: []string{"pkg1_data", "pkg2_data"}, + SHA256: &[...]byte{ + 0xf1, 0x77, 0xb3, 0x7f, 0x18, 0xf5, 0xbc, 0x65, 0x96, + 0x87, 0x8f, 0x07, 0x47, 0x21, 0xd7, 0x96, 0xc2, 0x33, + 0x3d, 0x95, 0xf2, 0x6c, 0xe1, 0xe4, 0x5c, 0x5a, 0x09, + 0x6c, 0x35, 0x0a, 0x1c, 0x07, + }, + FinalSHA256: &[...]byte{ + 0x61, 0xbd, 0x49, 0x50, 0x76, 0x99, 0x9a, 0x77, 0xf7, + 0x52, 0x88, 0xfc, 0xfc, 0xdd, 0x76, 0x07, 0x3e, 0xc4, + 0xaa, 0x11, 0x46, 0x32, 0xa3, 0x10, 0xb3, 0xb3, 0x26, + 0x3c, 0x49, 0x8e, 0x12, 0xf7, + }, + Size: 123, + }, +}, { + json: `{"kind":"path","path":"/x/y/z","mode":"0777","slices":[],"link":"/home"}`, + value: db.Path{Path: "/x/y/z", Mode: 0777, Link: "/home"}, +}, { + json: `{"kind":"path","path":"/x/y/z","mode":"0","slices":null}`, + value: db.Path{Path: "/x/y/z"}, +}, { + json: `{"kind":"path","path":"/x/y/z","mode":"0"}`, + value: db.Path{Path: "/x/y/z"}, +}, { + json: `{"kind":"content","slice":"pkg_sl","path":"/a/b/c"}`, + value: db.Content{Slice: "pkg_sl", Path: "/a/b/c"}, +}, { + json: `{"kind":"path","path":"","mode":"90909"}`, + value: db.Path{}, + error: `invalid mode "90909": strconv.ParseUint: parsing "90909": invalid syntax`, +}, { + json: `{"kind":"path","path":"","mode":"077777777777"}`, + value: db.Path{}, + error: `invalid mode "077777777777": strconv.ParseUint: parsing "077777777777": value out of range`, +}, { + json: `{"kind":"path","path":"/tmp/abc.txt","mode":"0644","sha256":"too short"}`, + value: db.Path{}, + error: `invalid sha256 "too short": length 9 != 64`, +}, { + json: `{"kind":"path","path":"/tmp/abc.txt","mode":"0644","sha256":"gfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"}`, + value: db.Path{}, + error: `invalid sha256 "gfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff": encoding/hex: invalid byte: U\+0067 'g'`, +}, { + json: `{"kind":"package","name":"foo_libs"}`, + value: db.Slice{}, + error: `invalid kind "package": must be "slice"`, +}} + +func (s *S) TestUnmarshal(c *C) { + for i, test := range unmarshalTestCases { + c.Logf("Test #%d", i) + ptrOut := reflect.New(reflect.ValueOf(test.value).Type()) + err := json.Unmarshal([]byte(test.json), ptrOut.Interface()) + if test.error != "" { + c.Assert(err, ErrorMatches, test.error) + } else { + c.Assert(ptrOut.Elem().Interface(), DeepEquals, test.value) + } + } +}