Skip to content

Commit

Permalink
feat: add support for hard links (#179)
Browse files Browse the repository at this point in the history
Hard links can now be extracted and they are correctly reflected in the manifest.
  • Loading branch information
letFunny authored Jan 10, 2025
1 parent b325dbe commit 9d9d214
Show file tree
Hide file tree
Showing 17 changed files with 1,093 additions and 126 deletions.
6 changes: 3 additions & 3 deletions internal/archive/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import (

type Archive interface {
Options() *Options
Fetch(pkg string) (io.ReadCloser, *PackageInfo, error)
Fetch(pkg string) (io.ReadSeekCloser, *PackageInfo, error)
Exists(pkg string) bool
Info(pkg string) (*PackageInfo, error)
}
Expand Down Expand Up @@ -123,7 +123,7 @@ func (a *ubuntuArchive) selectPackage(pkg string) (control.Section, *ubuntuIndex
return selectedSection, selectedIndex, nil
}

func (a *ubuntuArchive) Fetch(pkg string) (io.ReadCloser, *PackageInfo, error) {
func (a *ubuntuArchive) Fetch(pkg string) (io.ReadSeekCloser, *PackageInfo, error) {
section, index, err := a.selectPackage(pkg)
if err != nil {
return nil, nil, err
Expand Down Expand Up @@ -363,7 +363,7 @@ func (index *ubuntuIndex) checkComponents(components []string) error {
return nil
}

func (index *ubuntuIndex) fetch(suffix, digest string, flags fetchFlags) (io.ReadCloser, error) {
func (index *ubuntuIndex) fetch(suffix, digest string, flags fetchFlags) (io.ReadSeekCloser, error) {
reader, err := index.archive.cache.Open(digest)
if err == nil {
return reader, nil
Expand Down
2 changes: 1 addition & 1 deletion internal/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ func (c *Cache) Write(digest string, data []byte) error {
return err2
}

func (c *Cache) Open(digest string) (io.ReadCloser, error) {
func (c *Cache) Open(digest string) (io.ReadSeekCloser, error) {
if c.Dir == "" || digest == "" {
return nil, MissErr
}
Expand Down
238 changes: 194 additions & 44 deletions internal/deb/extract.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ func getValidOptions(options *ExtractOptions) (*ExtractOptions, error) {
return options, nil
}

func Extract(pkgReader io.Reader, options *ExtractOptions) (err error) {
func Extract(pkgReader io.ReadSeeker, options *ExtractOptions) (err error) {
defer func() {
if err != nil {
err = fmt.Errorf("cannot extract from package %q: %w", options.Package, err)
Expand All @@ -83,43 +83,15 @@ func Extract(pkgReader io.Reader, options *ExtractOptions) (err error) {
return err
}

arReader := ar.NewReader(pkgReader)
var dataReader io.Reader
for dataReader == nil {
arHeader, err := arReader.Next()
if err == io.EOF {
return fmt.Errorf("no data payload")
}
if err != nil {
return err
}
switch arHeader.Name {
case "data.tar.gz":
gzipReader, err := gzip.NewReader(arReader)
if err != nil {
return err
}
defer gzipReader.Close()
dataReader = gzipReader
case "data.tar.xz":
xzReader, err := xz.NewReader(arReader)
if err != nil {
return err
}
dataReader = xzReader
case "data.tar.zst":
zstdReader, err := zstd.NewReader(arReader)
if err != nil {
return err
}
defer zstdReader.Close()
dataReader = zstdReader
}
}
return extractData(dataReader, validOpts)
return extractData(pkgReader, validOpts)
}

func extractData(dataReader io.Reader, options *ExtractOptions) error {
func extractData(pkgReader io.ReadSeeker, options *ExtractOptions) error {
dataReader, err := getDataReader(pkgReader)
if err != nil {
return err
}
defer dataReader.Close()

oldUmask := syscall.Umask(0)
defer func() {
Expand All @@ -136,6 +108,15 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error {
}
}

// Store the hard links that we cannot extract when we first iterate over
// the tarball.
//
// This happens because the tarball only stores the contents once in the
// first entry and the rest of them point to the first one. Therefore, we
// cannot tell whether we need to extract the content until after we get to
// a hard link. In this case, we need a second pass.
pendingHardLinks := make(map[string][]pendingHardLink)

// When creating a file we will iterate through its parent directories and
// create them with the permissions defined in the tarball.
//
Expand All @@ -153,12 +134,8 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error {
return err
}

sourcePath := tarHeader.Name
if len(sourcePath) < 3 || sourcePath[0] != '.' || sourcePath[1] != '/' {
continue
}
sourcePath = sourcePath[1:]
if sourcePath == "" {
sourcePath, ok := sanitizeTarPath(tarHeader.Name)
if !ok {
continue
}

Expand Down Expand Up @@ -245,22 +222,64 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error {
return err
}
}
link := tarHeader.Linkname
if tarHeader.Typeflag == tar.TypeLink {
// A hard link requires the real path of the target file.
// TODO This path must be sanitized to prevent a crafted
// tarball from referring to content outside the target
// directory. This issue also affects targetPath, but it's more
// relevant here as this is importing external content into the
// target directory. This is not a more serious issue because
// these tarballs are signed official packages, but must be
// fixed regardless.
link = filepath.Join(options.TargetDir, link)
}

// Create the entry itself.
createOptions := &fsutil.CreateOptions{
Path: filepath.Join(options.TargetDir, targetPath),
Mode: tarHeader.FileInfo().Mode(),
Data: pathReader,
Link: tarHeader.Linkname,
Link: link,
MakeParents: true,
OverrideMode: true,
}
err := options.Create(extractInfos, createOptions)
if err != nil {
if err != nil && os.IsNotExist(err) && tarHeader.Typeflag == tar.TypeLink {
// The hard link could not be created because the content
// was not extracted previously. Add this hard link entry
// to the pending list to extract later.
relLinkPath, ok := sanitizeTarPath(tarHeader.Linkname)
if !ok {
return fmt.Errorf("invalid link target %s", tarHeader.Linkname)
}
info := pendingHardLink{
path: targetPath,
extractInfos: extractInfos,
}
pendingHardLinks[relLinkPath] = append(pendingHardLinks[relLinkPath], info)
} else if err != nil {
return err
}
}
}

if len(pendingHardLinks) > 0 {
// Go over the tarball again to textract the pending hard links.
extractHardLinkOptions := &extractHardLinkOptions{
ExtractOptions: options,
pendingLinks: pendingHardLinks,
}
_, err := pkgReader.Seek(0, io.SeekStart)
if err != nil {
return err
}
err = extractHardLinks(pkgReader, extractHardLinkOptions)
if err != nil {
return err
}
}

if len(pendingPaths) > 0 {
pendingList := make([]string, 0, len(pendingPaths))
for pendingPath := range pendingPaths {
Expand All @@ -277,6 +296,128 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error {
return nil
}

type pendingHardLink struct {
path string
extractInfos []ExtractInfo
}

type extractHardLinkOptions struct {
*ExtractOptions
pendingLinks map[string][]pendingHardLink
}

// extractHardLinks iterates through the tarball a second time to extract the
// hard links that were not extracted in the first pass.
func extractHardLinks(pkgReader io.ReadSeeker, opts *extractHardLinkOptions) error {
dataReader, err := getDataReader(pkgReader)
if err != nil {
return err
}
defer dataReader.Close()

tarReader := tar.NewReader(dataReader)
for {
tarHeader, err := tarReader.Next()
if err == io.EOF {
break
}
if err != nil {
return err
}

sourcePath, ok := sanitizeTarPath(tarHeader.Name)
if !ok {
continue
}

links := opts.pendingLinks[sourcePath]
if len(links) == 0 {
continue
}

// For a target path, the first hard link will be created as a file with
// the content of the target path. If there are more pending hard links,
// the remaining ones will be created as hard links with the newly
// created file as their target.
absLink := filepath.Join(opts.TargetDir, links[0].path)
// Extract the content to the first hard link path.
createOptions := &fsutil.CreateOptions{
Path: absLink,
Mode: tarHeader.FileInfo().Mode(),
Data: tarReader,
}
err = opts.Create(links[0].extractInfos, createOptions)
if err != nil {
return err
}

// Create the remaining hard links.
for _, link := range links[1:] {
createOptions := &fsutil.CreateOptions{
Path: filepath.Join(opts.TargetDir, link.path),
Mode: tarHeader.FileInfo().Mode(),
// Link to the first file extracted for the hard links.
Link: absLink,
}
err := opts.Create(link.extractInfos, createOptions)
if err != nil {
return err
}
}
delete(opts.pendingLinks, sourcePath)
}

// If there are pending links, that means the link targets do not come from
// this package.
if len(opts.pendingLinks) > 0 {
var targets []string
for target, _ := range opts.pendingLinks {
targets = append(targets, target)
}
sort.Strings(targets)
link := opts.pendingLinks[targets[0]][0]
return fmt.Errorf("cannot create hard link %s: no content at %s", link.path, targets[0])
}

return nil
}

func getDataReader(pkgReader io.ReadSeeker) (io.ReadCloser, error) {
arReader := ar.NewReader(pkgReader)
var dataReader io.ReadCloser
for dataReader == nil {
arHeader, err := arReader.Next()
if err == io.EOF {
return nil, fmt.Errorf("no data payload")
}
if err != nil {
return nil, err
}
switch arHeader.Name {
case "data.tar.gz":
gzipReader, err := gzip.NewReader(arReader)
if err != nil {
return nil, err
}
dataReader = gzipReader
case "data.tar.xz":
xzReader, err := xz.NewReader(arReader)
if err != nil {
return nil, err
}
dataReader = io.NopCloser(xzReader)
case "data.tar.zst":
zstdReader, err := zstd.NewReader(arReader)
if err != nil {
return nil, err
}
dataReader = zstdReader.IOReadCloser()
}
}

return dataReader, nil
}

func parentDirs(path string) []string {
path = filepath.Clean(path)
parents := make([]string, strings.Count(path, "/"))
Expand All @@ -289,3 +430,12 @@ func parentDirs(path string) []string {
}
return parents
}

// sanitizeTarPath removes the leading "./" from the source path in the tarball,
// and verifies that the path is not empty.
func sanitizeTarPath(path string) (string, bool) {
if len(path) < 3 || path[0] != '.' || path[1] != '/' {
return "", false
}
return path[1:], true
}
Loading

0 comments on commit 9d9d214

Please sign in to comment.