Skip to content

Commit

Permalink
feat: add suuport for symmetric hardlink handling
Browse files Browse the repository at this point in the history
  • Loading branch information
zhijie-yang committed Sep 26, 2024
1 parent f1de97a commit 2a56478
Show file tree
Hide file tree
Showing 8 changed files with 408 additions and 112 deletions.
6 changes: 3 additions & 3 deletions internal/archive/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import (

type Archive interface {
Options() *Options
Fetch(pkg string) (io.ReadCloser, error)
Fetch(pkg string) (io.ReadSeekCloser, error)
Exists(pkg string) bool
}

Expand Down Expand Up @@ -112,7 +112,7 @@ func (a *ubuntuArchive) selectPackage(pkg string) (control.Section, *ubuntuIndex
return selectedSection, selectedIndex, nil
}

func (a *ubuntuArchive) Fetch(pkg string) (io.ReadCloser, error) {
func (a *ubuntuArchive) Fetch(pkg string) (io.ReadSeekCloser, error) {
section, index, err := a.selectPackage(pkg)
if err != nil {
return nil, err
Expand Down Expand Up @@ -269,7 +269,7 @@ func (index *ubuntuIndex) checkComponents(components []string) error {
return nil
}

func (index *ubuntuIndex) fetch(suffix, digest string, flags fetchFlags) (io.ReadCloser, error) {
func (index *ubuntuIndex) fetch(suffix, digest string, flags fetchFlags) (io.ReadSeekCloser, error) {
reader, err := index.archive.cache.Open(digest)
if err == nil {
return reader, nil
Expand Down
2 changes: 1 addition & 1 deletion internal/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ func (c *Cache) Write(digest string, data []byte) error {
return err2
}

func (c *Cache) Open(digest string) (io.ReadCloser, error) {
func (c *Cache) Open(digest string) (io.ReadSeekCloser, error) {
if c.Dir == "" || digest == "" {
return nil, MissErr
}
Expand Down
136 changes: 122 additions & 14 deletions internal/deb/extract.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ type ExtractInfo struct {
Context any
}

type PendingHardlink struct {
TargetPath string
ExtractInfos []ExtractInfo
}

func getValidOptions(options *ExtractOptions) (*ExtractOptions, error) {
for extractPath, extractInfos := range options.Extract {
isGlob := strings.ContainsAny(extractPath, "*?")
Expand All @@ -62,7 +67,7 @@ func getValidOptions(options *ExtractOptions) (*ExtractOptions, error) {
return options, nil
}

func Extract(pkgReader io.Reader, options *ExtractOptions) (err error) {
func Extract(pkgReader io.ReadSeeker, options *ExtractOptions) (err error) {
defer func() {
if err != nil {
err = fmt.Errorf("cannot extract from package %q: %w", options.Package, err)
Expand All @@ -83,43 +88,51 @@ func Extract(pkgReader io.Reader, options *ExtractOptions) (err error) {
return err
}

return extractData(pkgReader, validOpts)
}

func getDataReader(pkgReader io.ReadSeeker) (io.ReadCloser, error) {
arReader := ar.NewReader(pkgReader)
var dataReader io.Reader
var dataReader io.ReadCloser
for dataReader == nil {
arHeader, err := arReader.Next()
if err == io.EOF {
return fmt.Errorf("no data payload")
return nil, fmt.Errorf("no data payload")
}
if err != nil {
return err
return nil, err
}
switch arHeader.Name {
case "data.tar.gz":
gzipReader, err := gzip.NewReader(arReader)
if err != nil {
return err
return nil, err
}
defer gzipReader.Close()
dataReader = gzipReader
case "data.tar.xz":
xzReader, err := xz.NewReader(arReader)
if err != nil {
return err
return nil, err
}
dataReader = xzReader
dataReader = io.NopCloser(xzReader)
case "data.tar.zst":
zstdReader, err := zstd.NewReader(arReader)
if err != nil {
return err
return nil, err
}
defer zstdReader.Close()
dataReader = zstdReader
dataReader = zstdReader.IOReadCloser()
}
}
return extractData(dataReader, validOpts)

return dataReader, nil
}

func extractData(dataReader io.Reader, options *ExtractOptions) error {
func extractData(pkgReader io.ReadSeeker, options *ExtractOptions) error {

dataReader, err := getDataReader(pkgReader)
if err != nil {
return err
}

oldUmask := syscall.Umask(0)
defer func() {
Expand All @@ -136,6 +149,12 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error {
}
}

// A mapping from the base file path to all the hard links that are pending
// We will put the entry only if the base file does not exist
// We will do the second pass if this map is not empty
// TODO we need both the modes and the extractInfo here
pendingHardlinks := make(map[string][]PendingHardlink)

// When creating a file we will iterate through its parent directories and
// create them with the permissions defined in the tarball.
//
Expand Down Expand Up @@ -261,10 +280,37 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error {
}
err := options.Create(extractInfos, createOptions)
if err != nil {
return err
// Handles the hardlink where its counterpart is not extracted
if tarHeader.Typeflag == tar.TypeLink && strings.HasPrefix(err.Error(), "link target does not exist") {
basePath := sanitizePath(tarHeader.Linkname)
pendingHardlinks[basePath] = append(pendingHardlinks[basePath],
PendingHardlink{
TargetPath: targetPath,
ExtractInfos: extractInfos,
})
pendingPaths[basePath] = true
} else {
return err
}
}
}
}
// helperfunction()

// Second pass to create hard links
if len(pendingHardlinks) > 0 {
pkgReader.Seek(0, io.SeekStart)
dataReader, err = getDataReader(pkgReader)
if err != nil {
return err
}
tarReader := tar.NewReader(dataReader)
err = handlePendingHardlinks(options, pendingHardlinks, tarReader, &pendingPaths)
if err != nil {
return err
}

}

if len(pendingPaths) > 0 {
pendingList := make([]string, 0, len(pendingPaths))
Expand All @@ -279,6 +325,60 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error {
}
}

dataReader.Close()

return nil
}

func handlePendingHardlinks(options *ExtractOptions, pendingHardlinks map[string][]PendingHardlink,
tarReader *tar.Reader, pendingPaths *map[string]bool) error {
for {
tarHeader, err := tarReader.Next()
if err == io.EOF {
break
}
if err != nil {
return err
}

sourcePath := sanitizePath(tarHeader.Name)
if sourcePath == "" {
continue
}

hardlinks, ok := pendingHardlinks[sourcePath]
if !ok {
continue
}
// algorithm
// 1. read the contents of the sourcePath
createOption := &fsutil.CreateOptions{
Path: filepath.Join(options.TargetDir, hardlinks[0].TargetPath),
Mode: 0644, //TODO change it to the recorded mode later
Data: tarReader,
}

// TODO pass extractInfo into Create
err = options.Create(hardlinks[0].ExtractInfos, createOption)
if err != nil {
return err
}
delete(*pendingPaths, sourcePath)

for _, hardlink := range hardlinks[1:] {
createOption := &fsutil.CreateOptions{
Path: filepath.Join(options.TargetDir, hardlink.TargetPath),
Mode: 0644, //TODO change it to the recorded mode later
Link: filepath.Join(options.TargetDir, hardlinks[0].TargetPath),
}
// fmt.Println("====> ", "Creating", createOption)
// TODO pass extractInfo into Create
err := options.Create(hardlink.ExtractInfos, createOption)
if err != nil {
return err
}
}
}
return nil
}

Expand All @@ -294,3 +394,11 @@ func parentDirs(path string) []string {
}
return parents
}

func sanitizePath(path string) string {
if len(path) < 3 || path[0] != '.' || path[1] != '/' {
return ""
}
path = path[1:]
return path
}
6 changes: 3 additions & 3 deletions internal/deb/extract_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ var extractTests = []extractTest{{
}},
},
},
error: `cannot extract from package "test-package": link target does not exist: \/[^ ]*\/non-existing-target`,
error: `cannot extract from package "test-package": no content at \/non-existing-target`,
}, {
summary: "Hard link to symlink does not follow symlink",
pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{
Expand Down Expand Up @@ -433,7 +433,7 @@ func (s *S) TestExtract(c *C) {
test.hackopt(&options)
}

err := deb.Extract(bytes.NewBuffer(test.pkgdata), &options)
err := deb.Extract(bytes.NewReader(test.pkgdata), &options)
if test.error != "" {
c.Assert(err, ErrorMatches, test.error)
continue
Expand Down Expand Up @@ -544,7 +544,7 @@ func (s *S) TestExtractCreateCallback(c *C) {
return nil
}

err := deb.Extract(bytes.NewBuffer(test.pkgdata), &options)
err := deb.Extract(bytes.NewReader(test.pkgdata), &options)
c.Assert(err, IsNil)

c.Assert(createExtractInfos, DeepEquals, test.calls)
Expand Down
71 changes: 51 additions & 20 deletions internal/slicer/report.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@ import (
)

type ReportEntry struct {
Path string
Mode fs.FileMode
Hash string
Size int
Slices map[*setup.Slice]bool
Link string
FinalHash string
Path string
Mode fs.FileMode
Hash string
Size int
Slices map[*setup.Slice]bool
Link string
FinalHash string
HardLinkId int
}

// Report holds the information about files and directories created when slicing
Expand All @@ -26,7 +27,8 @@ type Report struct {
// Root is the filesystem path where the all reported content is based.
Root string
// Entries holds all reported content, indexed by their path.
Entries map[string]ReportEntry
Entries map[string]ReportEntry
currHardLinkId int
}

// NewReport returns an empty report for content that will be based at the
Expand All @@ -48,26 +50,55 @@ func (r *Report) Add(slice *setup.Slice, fsEntry *fsutil.Entry) error {
return fmt.Errorf("cannot add path to report: %s", err)
}

// Handle the hard link group
hardLinkId := 0
hash := fsEntry.Hash
size := fsEntry.Size
link := fsEntry.Link
if fsEntry.Link != "" {
// Having the link target in root is a necessary but insufficient condition for a hardlink.
if strings.HasPrefix(fsEntry.Link, r.Root) {
relLinkPath, _ := r.sanitizeAbsPath(fsEntry.Link, false)
// With this, a hardlink is found
if entry, ok := r.Entries[relLinkPath]; ok {
if entry.HardLinkId == 0 {
r.currHardLinkId++
entry.HardLinkId = r.currHardLinkId
r.Entries[relLinkPath] = entry
}
hardLinkId = entry.HardLinkId
if fsEntry.Mode.IsRegular() { // If the hardlink links to a regular file
hash = entry.Hash
size = entry.Size
link = relLinkPath
} else { // If the hardlink links to a symlink
link = entry.Link
}
} // else, this is a symlink
} // else, this is a symlink
}

if entry, ok := r.Entries[relPath]; ok {
if fsEntry.Mode != entry.Mode {
return fmt.Errorf("path %s reported twice with diverging mode: 0%03o != 0%03o", relPath, fsEntry.Mode, entry.Mode)
} else if fsEntry.Link != entry.Link {
return fmt.Errorf("path %s reported twice with diverging link: %q != %q", relPath, fsEntry.Link, entry.Link)
} else if fsEntry.Size != entry.Size {
return fmt.Errorf("path %s reported twice with diverging size: %d != %d", relPath, fsEntry.Size, entry.Size)
} else if fsEntry.Hash != entry.Hash {
return fmt.Errorf("path %s reported twice with diverging hash: %q != %q", relPath, fsEntry.Hash, entry.Hash)
} else if link != entry.Link {
return fmt.Errorf("path %s reported twice with diverging link: %q != %q", relPath, link, entry.Link)
} else if size != entry.Size {
return fmt.Errorf("path %s reported twice with diverging size: %d != %d", relPath, size, entry.Size)
} else if hash != entry.Hash {
return fmt.Errorf("path %s reported twice with diverging hash: %q != %q", relPath, hash, entry.Hash)
}
entry.Slices[slice] = true
r.Entries[relPath] = entry
} else {
r.Entries[relPath] = ReportEntry{
Path: relPath,
Mode: fsEntry.Mode,
Hash: fsEntry.Hash,
Size: fsEntry.Size,
Slices: map[*setup.Slice]bool{slice: true},
Link: fsEntry.Link,
Path: relPath,
Mode: fsEntry.Mode,
Hash: hash,
Size: size,
Slices: map[*setup.Slice]bool{slice: true},
Link: link,
HardLinkId: hardLinkId,
}
}
return nil
Expand Down
Loading

0 comments on commit 2a56478

Please sign in to comment.