Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hardlinks are dereferenced in generated archives #1896

Merged
merged 6 commits into from
Dec 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 35 additions & 1 deletion pkg/archive/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@
}
}

hardLinkFiles := map[uint64]string{}
return filepath.Walk(srcDir, func(file string, fi os.FileInfo, err error) error {
var relPath string
if fileFilter != nil {
Expand Down Expand Up @@ -218,12 +219,16 @@
}

header.Name = getHeaderNameFromBaseAndRelPath(basePath, relPath)
if err = processHardLinks(file, fi, hardLinkFiles, header); err != nil {
return err
}

Check warning on line 224 in pkg/archive/archive.go

View check run for this annotation

Codecov / codecov/patch

pkg/archive/archive.go#L223-L224

Added lines #L223 - L224 were not covered by tests

err = writeHeader(header, uid, gid, mode, normalizeModTime, tw)
if err != nil {
return err
}

if hasRegularMode(fi) {
if hasRegularMode(fi) && header.Size > 0 {
f, err := os.Open(filepath.Clean(file))
if err != nil {
return err
Expand All @@ -239,6 +244,35 @@
})
}

// processHardLinks determine if the given file has hard-links associated with it, the given hardLinkFiles map keeps track
// of any previous hard-link previously processed. In case the hard-link was already found, the header will be updated with
// the previous information otherwise the new hard-link found will be tracked into the map
func processHardLinks(file string, fi os.FileInfo, hardLinkFiles map[uint64]string, header *tar.Header) error {
var (
err error
hardlinks bool
inode uint64
)
if hardlinks, err = hasHardlinks(fi, file); err != nil {
return err
}

Check warning on line 258 in pkg/archive/archive.go

View check run for this annotation

Codecov / codecov/patch

pkg/archive/archive.go#L257-L258

Added lines #L257 - L258 were not covered by tests
if hardlinks {
inode, err = getInodeFromStat(fi.Sys(), file)
if err != nil {
return err
}

Check warning on line 263 in pkg/archive/archive.go

View check run for this annotation

Codecov / codecov/patch

pkg/archive/archive.go#L262-L263

Added lines #L262 - L263 were not covered by tests

if processedPath, ok := hardLinkFiles[inode]; ok {
header.Typeflag = tar.TypeLink
header.Linkname = processedPath
header.Size = 0
} else {
hardLinkFiles[inode] = header.Name
}
}
return nil
}

// WriteZipToTar writes the contents of a zip file to a tar writer.
func WriteZipToTar(tw TarWriter, srcZip, basePath string, uid, gid int, mode int64, normalizeModTime bool, fileFilter func(string) bool) error {
zipReader, err := zip.OpenReader(srcZip)
Expand Down
36 changes: 35 additions & 1 deletion pkg/archive/archive_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,10 @@ func testArchive(t *testing.T, when spec.G, it spec.S) {

it.After(func() {
if err := os.RemoveAll(tmpDir); err != nil {
t.Fatalf("failed to clean up tmp dir %s: %s", tmpDir, err)
if runtime.GOOS != "windows" {
// skip "The process cannot access the file because it is being used by another process" on windows
t.Fatalf("failed to clean up tmp dir %s: %s", tmpDir, err)
}
}
})

Expand Down Expand Up @@ -442,6 +445,37 @@ func testArchive(t *testing.T, when spec.G, it spec.S) {
})
})
})

when("hard link files are present", func() {
it.Before(func() {
src = filepath.Join("testdata", "dir-to-tar-with-hardlink")
// create a hard link
err := os.Link(filepath.Join(src, "original-file"), filepath.Join(src, "original-file-2"))
h.AssertNil(t, err)
})

it.After(func() {
os.RemoveAll(filepath.Join(src, "original-file-2"))
})

it("tar file file doesn't include duplicated data", func() {
outputFilename := filepath.Join(tmpDir, "file-with-hard-links.tar")
fh, err := os.Create(outputFilename)
h.AssertNil(t, err)

tw := tar.NewWriter(fh)
err = archive.WriteDirToTar(tw, src, "/nested/dir", 1234, 2345, 0777, true, false, nil)

h.AssertNil(t, err)
h.AssertNil(t, tw.Close())
h.AssertNil(t, fh.Close())
h.AssertOnTarEntries(t, outputFilename,
"/nested/dir/original-file",
"/nested/dir/original-file-2",
h.AreEquivalentHardLinks(),
)
})
})
})

when("#WriteZipToTar", func() {
Expand Down
22 changes: 22 additions & 0 deletions pkg/archive/archive_unix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//go:build linux || darwin

package archive

import (
"os"
"syscall"
)

// hasHardlinks check if the given files has a hard-link associated with it
func hasHardlinks(fi os.FileInfo, path string) (bool, error) {
return fi.Sys().(*syscall.Stat_t).Nlink > 1, nil
}

// getInodeFromStat returns the inode (index node) value associated with the given file
func getInodeFromStat(stat interface{}, path string) (inode uint64, err error) {
s, ok := stat.(*syscall.Stat_t)
if ok {
inode = s.Ino
}
return
}
68 changes: 68 additions & 0 deletions pkg/archive/archive_windows.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
//go:build windows

package archive

import (
"os"
"syscall"

"golang.org/x/sys/windows"
)

// hasHardlinks returns true if the given file has hard-links associated with it
func hasHardlinks(fi os.FileInfo, path string) (bool, error) {
var numberOfLinks uint32
switch v := fi.Sys().(type) {
case *syscall.ByHandleFileInformation:
numberOfLinks = v.NumberOfLinks

Check warning on line 17 in pkg/archive/archive_windows.go

View check run for this annotation

Codecov / codecov/patch

pkg/archive/archive_windows.go#L16-L17

Added lines #L16 - L17 were not covered by tests
default:
// We need an instance of a ByHandleFileInformation to read NumberOfLinks
info, err := open(path)
if err != nil {
return false, err
}

Check warning on line 23 in pkg/archive/archive_windows.go

View check run for this annotation

Codecov / codecov/patch

pkg/archive/archive_windows.go#L22-L23

Added lines #L22 - L23 were not covered by tests
numberOfLinks = info.NumberOfLinks
}
return numberOfLinks > 1, nil
}

// getInodeFromStat returns an equivalent representation of unix inode on windows based on FileIndexHigh and FileIndexLow values
func getInodeFromStat(stat interface{}, path string) (inode uint64, err error) {
s, ok := stat.(*syscall.ByHandleFileInformation)
if ok {
inode = (uint64(s.FileIndexHigh) << 32) | uint64(s.FileIndexLow)

Check warning on line 33 in pkg/archive/archive_windows.go

View check run for this annotation

Codecov / codecov/patch

pkg/archive/archive_windows.go#L33

Added line #L33 was not covered by tests
} else {
s, err = open(path)
if err == nil {
inode = (uint64(s.FileIndexHigh) << 32) | uint64(s.FileIndexLow)
}
}
return
}

// open returns a ByHandleFileInformation object representation of the given file
func open(path string) (*syscall.ByHandleFileInformation, error) {
fPath, err := syscall.UTF16PtrFromString(path)
if err != nil {
return nil, err
}

Check warning on line 48 in pkg/archive/archive_windows.go

View check run for this annotation

Codecov / codecov/patch

pkg/archive/archive_windows.go#L47-L48

Added lines #L47 - L48 were not covered by tests

handle, err := syscall.CreateFile(
fPath,
windows.FILE_READ_ATTRIBUTES,
syscall.FILE_SHARE_READ|syscall.FILE_SHARE_WRITE|syscall.FILE_SHARE_DELETE,
nil,
syscall.OPEN_EXISTING,
syscall.FILE_FLAG_BACKUP_SEMANTICS,
0)
if err != nil {
return nil, err
}

Check warning on line 60 in pkg/archive/archive_windows.go

View check run for this annotation

Codecov / codecov/patch

pkg/archive/archive_windows.go#L59-L60

Added lines #L59 - L60 were not covered by tests
defer syscall.CloseHandle(handle)

var info syscall.ByHandleFileInformation
if err = syscall.GetFileInformationByHandle(handle, &info); err != nil {
return nil, err
}

Check warning on line 66 in pkg/archive/archive_windows.go

View check run for this annotation

Codecov / codecov/patch

pkg/archive/archive_windows.go#L65-L66

Added lines #L65 - L66 were not covered by tests
return &info, nil
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
foo
4 changes: 4 additions & 0 deletions pkg/buildpack/buildpack.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,10 @@ func toDistTar(tw archive.TarWriter, descriptor Descriptor, blob Blob) error {

header.Mode = calcFileMode(header)
header.Name = path.Join(baseTarDir, header.Name)

if header.Typeflag == tar.TypeLink {
header.Linkname = path.Join(baseTarDir, path.Clean(header.Linkname))
}
err = tw.WriteHeader(header)
if err != nil {
return errors.Wrapf(err, "failed to write header for '%s'", header.Name)
Expand Down
7 changes: 1 addition & 6 deletions pkg/buildpack/buildpack_tar_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,7 @@ func (b *BuildModuleWriter) writeBuildModuleToTar(tw archive.TarWriter, module B
return errors.Wrapf(err, "failed to write header for '%s'", header.Name)
}

buf, err := io.ReadAll(tr)
if err != nil {
return errors.Wrapf(err, "failed to read contents of '%s'", header.Name)
}

_, err = tw.Write(buf)
_, err = io.Copy(tw, tr)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noticed this line could throw the same error reported here

if err != nil {
return errors.Wrapf(err, "failed to write contents to '%s'", header.Name)
}
Expand Down
33 changes: 33 additions & 0 deletions pkg/buildpack/buildpack_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/sclevine/spec/report"

"github.com/buildpacks/pack/pkg/archive"
"github.com/buildpacks/pack/pkg/blob"
"github.com/buildpacks/pack/pkg/buildpack"
"github.com/buildpacks/pack/pkg/dist"
h "github.com/buildpacks/pack/testhelpers"
Expand Down Expand Up @@ -511,6 +512,38 @@ version = "1.2.3"
h.AssertNil(t, err)
})
})

when("hardlink is present", func() {
var bpRootFolder string

it.Before(func() {
bpRootFolder = filepath.Join("testdata", "buildpack-with-hardlink")
// create a hard link
err := os.Link(filepath.Join(bpRootFolder, "original-file"), filepath.Join(bpRootFolder, "original-file-2"))
h.AssertNil(t, err)
})

it.After(func() {
os.RemoveAll(filepath.Join(bpRootFolder, "original-file-2"))
})

it("hardlink is preserved in the output tar file", func() {
bp, err := buildpack.FromBuildpackRootBlob(
blob.NewBlob(bpRootFolder),
archive.DefaultTarWriterFactory(),
)
h.AssertNil(t, err)

tarPath := writeBlobToFile(bp)
defer os.Remove(tarPath)

h.AssertOnTarEntries(t, tarPath,
"/cnb/buildpacks/bp.one/1.2.3/original-file",
"/cnb/buildpacks/bp.one/1.2.3/original-file-2",
h.AreEquivalentHardLinks(),
)
})
})
})

when("#Match", func() {
Expand Down
1 change: 1 addition & 0 deletions pkg/buildpack/testdata/buildpack-with-hardlink/bin/build
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
build-contents
Empty file.
10 changes: 10 additions & 0 deletions pkg/buildpack/testdata/buildpack-with-hardlink/buildpack.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
api = "0.3"

[buildpack]
id = "bp.one"
version = "1.2.3"
homepage = "http://one.buildpack"

[[stacks]]
id = "some.stack.id"
mixins = ["mixinX", "build:mixinY", "run:mixinZ"]
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
foo
36 changes: 36 additions & 0 deletions testhelpers/tar_assertions.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ var gzipMagicHeader = []byte{'\x1f', '\x8b'}

type TarEntryAssertion func(t *testing.T, header *tar.Header, data []byte)

type TarEntriesAssertion func(t *testing.T, header1 *tar.Header, data1 []byte, header2 *tar.Header, data2 []byte)

func AssertOnTarEntry(t *testing.T, tarPath, entryPath string, assertFns ...TarEntryAssertion) {
t.Helper()

Expand Down Expand Up @@ -48,6 +50,27 @@ func AssertOnNestedTar(nestedEntryPath string, assertions ...TarEntryAssertion)
}
}

func AssertOnTarEntries(t *testing.T, tarPath string, entryPath1, entryPath2 string, assertFns ...TarEntriesAssertion) {
t.Helper()

tarFile, err := os.Open(filepath.Clean(tarPath))
AssertNil(t, err)
defer tarFile.Close()

header1, data1, err := readTarFileEntry(tarFile, entryPath1)
AssertNil(t, err)

_, err = tarFile.Seek(0, io.SeekStart)
AssertNil(t, err)

header2, data2, err := readTarFileEntry(tarFile, entryPath2)
AssertNil(t, err)

for _, fn := range assertFns {
fn(t, header1, data1, header2, data2)
}
}

func readTarFileEntry(reader io.Reader, entryPath string) (*tar.Header, []byte, error) {
var (
gzipReader *gzip.Reader
Expand Down Expand Up @@ -113,6 +136,19 @@ func SymlinksTo(expectedTarget string) TarEntryAssertion {
}
}

func AreEquivalentHardLinks() TarEntriesAssertion {
return func(t *testing.T, header1 *tar.Header, _ []byte, header2 *tar.Header, _ []byte) {
t.Helper()
if header1.Typeflag != tar.TypeLink && header2.Typeflag != tar.TypeLink {
natalieparellano marked this conversation as resolved.
Show resolved Hide resolved
t.Fatalf("path '%s' and '%s' are not hardlinks, type flags are '%c' and '%c'", header1.Name, header2.Name, header1.Typeflag, header2.Typeflag)
}

if header1.Linkname != header2.Name && header2.Linkname != header1.Name {
t.Fatalf("'%s' and '%s' are not the same file", header1.Name, header2.Name)
}
}
}

func HasOwnerAndGroup(expectedUID int, expectedGID int) TarEntryAssertion {
return func(t *testing.T, header *tar.Header, _ []byte) {
t.Helper()
Expand Down
Loading