Skip to content

Commit

Permalink
Merge branch 'ROCKS-1471/symmetric-hardlinks-in-slicer' into rebase-h…
Browse files Browse the repository at this point in the history
…ardlink
  • Loading branch information
zhijie-yang committed Nov 8, 2024
2 parents 7eb8428 + d499ec7 commit 5836341
Show file tree
Hide file tree
Showing 14 changed files with 614 additions and 63 deletions.
6 changes: 3 additions & 3 deletions internal/archive/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import (

type Archive interface {
Options() *Options
Fetch(pkg string) (io.ReadCloser, *PackageInfo, error)
Fetch(pkg string) (io.ReadSeekCloser, *PackageInfo, error)
Exists(pkg string) bool
Info(pkg string) (*PackageInfo, error)
}
Expand Down Expand Up @@ -120,7 +120,7 @@ func (a *ubuntuArchive) selectPackage(pkg string) (control.Section, *ubuntuIndex
return selectedSection, selectedIndex, nil
}

func (a *ubuntuArchive) Fetch(pkg string) (io.ReadCloser, *PackageInfo, error) {
func (a *ubuntuArchive) Fetch(pkg string) (io.ReadSeekCloser, *PackageInfo, error) {
section, index, err := a.selectPackage(pkg)
if err != nil {
return nil, nil, err
Expand Down Expand Up @@ -287,7 +287,7 @@ func (index *ubuntuIndex) checkComponents(components []string) error {
return nil
}

func (index *ubuntuIndex) fetch(suffix, digest string, flags fetchFlags) (io.ReadCloser, error) {
func (index *ubuntuIndex) fetch(suffix, digest string, flags fetchFlags) (io.ReadSeekCloser, error) {
reader, err := index.archive.cache.Open(digest)
if err == nil {
return reader, nil
Expand Down
2 changes: 1 addition & 1 deletion internal/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ func (c *Cache) Write(digest string, data []byte) error {
return err2
}

func (c *Cache) Open(digest string) (io.ReadCloser, error) {
func (c *Cache) Open(digest string) (io.ReadSeekCloser, error) {
if c.Dir == "" || digest == "" {
return nil, MissErr
}
Expand Down
137 changes: 122 additions & 15 deletions internal/deb/extract.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ type ExtractInfo struct {
Context any
}

type PendingHardlink struct {
TargetPath string
ExtractInfos []ExtractInfo
}

func getValidOptions(options *ExtractOptions) (*ExtractOptions, error) {
for extractPath, extractInfos := range options.Extract {
isGlob := strings.ContainsAny(extractPath, "*?")
Expand All @@ -62,7 +67,7 @@ func getValidOptions(options *ExtractOptions) (*ExtractOptions, error) {
return options, nil
}

func Extract(pkgReader io.Reader, options *ExtractOptions) (err error) {
func Extract(pkgReader io.ReadSeeker, options *ExtractOptions) (err error) {
defer func() {
if err != nil {
err = fmt.Errorf("cannot extract from package %q: %w", options.Package, err)
Expand All @@ -83,43 +88,51 @@ func Extract(pkgReader io.Reader, options *ExtractOptions) (err error) {
return err
}

return extractData(pkgReader, validOpts)
}

func getDataReader(pkgReader io.ReadSeeker) (io.ReadCloser, error) {
arReader := ar.NewReader(pkgReader)
var dataReader io.Reader
var dataReader io.ReadCloser
for dataReader == nil {
arHeader, err := arReader.Next()
if err == io.EOF {
return fmt.Errorf("no data payload")
return nil, fmt.Errorf("no data payload")
}
if err != nil {
return err
return nil, err
}
switch arHeader.Name {
case "data.tar.gz":
gzipReader, err := gzip.NewReader(arReader)
if err != nil {
return err
return nil, err
}
defer gzipReader.Close()
dataReader = gzipReader
case "data.tar.xz":
xzReader, err := xz.NewReader(arReader)
if err != nil {
return err
return nil, err
}
dataReader = xzReader
dataReader = io.NopCloser(xzReader)
case "data.tar.zst":
zstdReader, err := zstd.NewReader(arReader)
if err != nil {
return err
return nil, err
}
defer zstdReader.Close()
dataReader = zstdReader
dataReader = zstdReader.IOReadCloser()
}
}
return extractData(dataReader, validOpts)

return dataReader, nil
}

func extractData(dataReader io.Reader, options *ExtractOptions) error {
func extractData(pkgReader io.ReadSeeker, options *ExtractOptions) error {

dataReader, err := getDataReader(pkgReader)
if err != nil {
return err
}

oldUmask := syscall.Umask(0)
defer func() {
Expand All @@ -136,6 +149,8 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error {
}
}

pendingHardlinks := make(map[string][]PendingHardlink)

// When creating a file we will iterate through its parent directories and
// create them with the permissions defined in the tarball.
//
Expand Down Expand Up @@ -246,20 +261,52 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error {
}
}
// Create the entry itself.
link := tarHeader.Linkname
if tarHeader.Typeflag == tar.TypeLink {
// A hard link requires the real path of the target file.
link = filepath.Join(options.TargetDir, link)
}

createOptions := &fsutil.CreateOptions{
Path: filepath.Join(options.TargetDir, targetPath),
Mode: tarHeader.FileInfo().Mode(),
Data: pathReader,
Link: tarHeader.Linkname,
Link: link,
MakeParents: true,
}
err := options.Create(extractInfos, createOptions)
if err != nil {
return err
// Handle the hardlink where its counterpart is not extracted
if tarHeader.Typeflag == tar.TypeLink && strings.HasPrefix(err.Error(), "link target does not exist") {
basePath := sanitizePath(tarHeader.Linkname)
pendingHardlinks[basePath] = append(pendingHardlinks[basePath],
PendingHardlink{
TargetPath: targetPath,
ExtractInfos: extractInfos,
})
pendingPaths[basePath] = true
} else {
return err
}
}
}
}

// Second pass to create hard links
if len(pendingHardlinks) > 0 {
pkgReader.Seek(0, io.SeekStart)
dataReader, err = getDataReader(pkgReader)
if err != nil {
return err
}
tarReader := tar.NewReader(dataReader)
err = handlePendingHardlinks(options, pendingHardlinks, tarReader, &pendingPaths)
if err != nil {
return err
}

}

if len(pendingPaths) > 0 {
pendingList := make([]string, 0, len(pendingPaths))
for pendingPath := range pendingPaths {
Expand All @@ -273,6 +320,58 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error {
}
}

dataReader.Close()

return nil
}

func handlePendingHardlinks(options *ExtractOptions, pendingHardlinks map[string][]PendingHardlink,
tarReader *tar.Reader, pendingPaths *map[string]bool) error {
for {
tarHeader, err := tarReader.Next()
if err == io.EOF {
break
}
if err != nil {
return err
}

sourcePath := sanitizePath(tarHeader.Name)
if sourcePath == "" {
continue
}

hardlinks, ok := pendingHardlinks[sourcePath]
if !ok {
continue
}

// Write the content for the first file in the hard link group
createOption := &fsutil.CreateOptions{
Path: filepath.Join(options.TargetDir, hardlinks[0].TargetPath),
Mode: tarHeader.FileInfo().Mode(),
Data: tarReader,
}

err = options.Create(hardlinks[0].ExtractInfos, createOption)
if err != nil {
return err
}
delete(*pendingPaths, sourcePath)

// Create the hard links for the rest of the group
for _, hardlink := range hardlinks[1:] {
createOption := &fsutil.CreateOptions{
Path: filepath.Join(options.TargetDir, hardlink.TargetPath),
Mode: tarHeader.FileInfo().Mode(),
Link: filepath.Join(options.TargetDir, hardlinks[0].TargetPath),
}
err := options.Create(hardlink.ExtractInfos, createOption)
if err != nil {
return err
}
}
}
return nil
}

Expand All @@ -288,3 +387,11 @@ func parentDirs(path string) []string {
}
return parents
}

func sanitizePath(path string) string {
if len(path) < 3 || path[0] != '.' || path[1] != '/' {
return ""
}
path = path[1:]
return path
}
60 changes: 58 additions & 2 deletions internal/deb/extract_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,62 @@ var extractTests = []extractTest{{
},
},
error: `cannot extract from package "test-package": path /dir/ requested twice with diverging mode: 0777 != 0000`,
}, {
summary: "Dangling hard link",
pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{
testutil.Dir(0755, "./"),
testutil.Hln(0644, "./link", "./non-existing-target"),
}),
options: deb.ExtractOptions{
Extract: map[string][]deb.ExtractInfo{
"/**": []deb.ExtractInfo{{
Path: "/**",
}},
},
},
error: `cannot extract from package "test-package": no content at \/non-existing-target`,
}, {
summary: "Hard link to symlink does not follow symlink",
pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{
testutil.Dir(0755, "./"),
testutil.Lnk(0644, "./symlink", "./file"),
testutil.Hln(0644, "./hardlink", "./symlink"),
}),
options: deb.ExtractOptions{
Extract: map[string][]deb.ExtractInfo{
"/**": []deb.ExtractInfo{{
Path: "/**",
}},
},
},
result: map[string]string{
"/hardlink": "symlink ./file",
"/symlink": "symlink ./file",
},
notCreated: []string{},
}, {
summary: "Extract all types of files",
pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{
testutil.Dir(0755, "./"),
testutil.Dir(0755, "./dir/"),
testutil.Reg(0644, "./dir/file", "text for file"),
testutil.Lnk(0644, "./symlink", "./dir/file"),
testutil.Hln(0644, "./hardlink", "./dir/file"),
}),
options: deb.ExtractOptions{
Extract: map[string][]deb.ExtractInfo{
"/**": []deb.ExtractInfo{{
Path: "/**",
}},
},
},
result: map[string]string{
"/dir/": "dir 0755",
"/dir/file": "file 0644 28121945",
"/hardlink": "file 0644 28121945",
"/symlink": "symlink ./dir/file",
},
notCreated: []string{},
}}

func (s *S) TestExtract(c *C) {
Expand All @@ -377,7 +433,7 @@ func (s *S) TestExtract(c *C) {
test.hackopt(&options)
}

err := deb.Extract(bytes.NewBuffer(test.pkgdata), &options)
err := deb.Extract(bytes.NewReader(test.pkgdata), &options)
if test.error != "" {
c.Assert(err, ErrorMatches, test.error)
continue
Expand Down Expand Up @@ -488,7 +544,7 @@ func (s *S) TestExtractCreateCallback(c *C) {
return nil
}

err := deb.Extract(bytes.NewBuffer(test.pkgdata), &options)
err := deb.Extract(bytes.NewReader(test.pkgdata), &options)
c.Assert(err, IsNil)

c.Assert(createExtractInfos, DeepEquals, test.calls)
Expand Down
35 changes: 33 additions & 2 deletions internal/fsutil/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ type CreateOptions struct {
Path string
Mode fs.FileMode
Data io.Reader
// If Link is set and the symlink flag is set in Mode, a symlink is
// created. If the Mode is not set to symlink, a hard link is created
// instead.
Link string
// If MakeParents is true, missing parent directories of Path are
// created with permissions 0755.
Expand Down Expand Up @@ -48,8 +51,14 @@ func Create(options *CreateOptions) (*Entry, error) {

switch o.Mode & fs.ModeType {
case 0:
err = createFile(o)
hash = hex.EncodeToString(rp.h.Sum(nil))
if o.Link != "" {
// Creating the hard link does not involve reading the file.
// Therefore, its size and hash is not calculated here.
err = createHardLink(o)
} else {
err = createFile(o)
hash = hex.EncodeToString(rp.h.Sum(nil))
}
case fs.ModeDir:
err = createDir(o)
case fs.ModeSymlink:
Expand Down Expand Up @@ -150,6 +159,28 @@ func createSymlink(o *CreateOptions) error {
return os.Symlink(o.Link, o.Path)
}

func createHardLink(o *CreateOptions) error {
debugf("Creating hard link: %s => %s", o.Path, o.Link)
linkInfo, err := os.Lstat(o.Link)
if err != nil && os.IsNotExist(err) {
return fmt.Errorf("link target does not exist: %s", o.Link)
} else if err != nil {
return err
}

pathInfo, err := os.Lstat(o.Path)
if err == nil || os.IsExist(err) {
if os.SameFile(linkInfo, pathInfo) {
return nil
}
return fmt.Errorf("path %s already exists", o.Path)
} else if !os.IsNotExist(err) {
return err
}

return os.Link(o.Link, o.Path)
}

// readerProxy implements the io.Reader interface proxying the calls to its
// inner io.Reader. On each read, the proxy keeps track of the file size and hash.
type readerProxy struct {
Expand Down
Loading

0 comments on commit 5836341

Please sign in to comment.