Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support local links to other files in the same (new) version during upload. #12

Merged
merged 5 commits into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ This contains a JSON object with the following properties:
When creating a new version of a project's assets, the Gobbler will attempt deduplication based on the file size and MD5 checksum.
Specifically, it will inspect the immediate previous version of the asset to see if any other files have a matching size/checksum.
If so, it will create a symbolic link to the file in the previous version rather than wasting disk space with a redundant copy.
Users can also directly instruct the Gobbler to create links by supplying symlinks to existing files in the registry.
Users can also directly instruct the Gobbler to create links by supplying symlinks during upload,
either to existing files in the registry or to other files in the same to-be-uploaded version of the asset.

Any "linked-from" files (i.e., those identified as copies of other existing files) will be present as symbolic links in the registry.
The existence of linked-from files can also be determined from the `..manifest` file for each project-asset-version;
Expand Down Expand Up @@ -188,12 +189,14 @@ On success, a new project is created with the designated permissions and a JSON
### Uploads and updates

To upload a new version of an asset of a project, users should create a temporary directory within the staging directory.
The directory may have any name but should avoid starting with `request-`.
The temporary directory may have any name but should avoid starting with `request-`.
Files within this temporary directory will be transferred to the appropriate subdirectory within the registry, subject to the following rules:

- Hidden files (i.e., prefixed with `.`) are ignored.
- Symbolic links to directories are not allowed.
- Symbolic links to files only allowed if the symlink target is an existing file within a project-asset-version subdirectory of the registry.
- Symbolic links to files are allowed if:
- The symlink target is an existing file within a project-asset-version subdirectory of the registry.
- The symlink target is a file in the same temporary directory.

Once this directory is constructed and populated, the user should create a file with the `request-upload-` prefix.
This file should be JSON-formatted with the following properties:
Expand Down
283 changes: 207 additions & 76 deletions transfer.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,51 @@ func computeChecksum(path string) (string, error) {
return hex.EncodeToString(h.Sum(nil)), nil
}

func resolveSymlink(
func createDedupManifest(registry, project, asset string) (map[string]linkMetadata, error) {
// Loading the latest version's metadata into a deduplication index.
// There's no need to check for probational versions here as only
// non-probational versions ever make it into '..latest'.
last_dedup := map[string]linkMetadata{}
asset_dir := filepath.Join(registry, project, asset)
latest_path := filepath.Join(asset_dir, latestFileName)

_, err := os.Stat(latest_path)
if err == nil {
latest, err := readLatest(asset_dir)
if err != nil {
return nil, fmt.Errorf("failed to identify the latest version; %w", err)
}

manifest, err := readManifest(filepath.Join(asset_dir, latest.Version))
if err != nil {
return nil, fmt.Errorf("failed to read the latest version's manifest; %w", err)
}

for k, v := range manifest {
self := linkMetadata{
Project: project,
Asset: asset,
Version: latest.Version,
Path: k,
}
if v.Link != nil {
if v.Link.Ancestor != nil {
self.Ancestor = v.Link.Ancestor
} else {
self.Ancestor = v.Link
}
}
last_dedup[strconv.FormatInt(v.Size, 10) + "-" + v.Md5sum] = self
}

} else if !errors.Is(err, os.ErrNotExist) {
return nil, fmt.Errorf("failed to stat '" + latest_path + "; %w", err)
}

return last_dedup, nil
}

func resolveRegistrySymlink(
registry string,
project string,
asset string,
Expand Down Expand Up @@ -157,9 +201,9 @@ func resolveSymlink(
return &output, nil
}

func createRelativeSymlink(relative_target, relative_link, full_link string) error {
// Actually creating the link. We convert it to a relative path
// within the registry so that the registry is relocatable.
func createRegistrySymlink(relative_target, relative_link, full_link string) error {
// We convert the link target to a relative path within the registry so
// that the registry is easily relocatable.
working := relative_link
for {
working = filepath.Dir(working)
Expand All @@ -174,59 +218,98 @@ func createRelativeSymlink(relative_target, relative_link, full_link string) err

err := os.Symlink(relative_target, full_link)
if err != nil {
return fmt.Errorf("failed to create a symlink at '" + full_link + "'; %w", err)
return fmt.Errorf("failed to create a registry symlink at '" + full_link + "'; %w", err)
}
return nil
}

func Transfer(source, registry, project, asset, version string) error {
destination := filepath.Join(registry, project, asset, version)
manifest := map[string]interface{}{}
manifest_cache := map[string]map[string]manifestEntry{}
summary_cache := map[string]bool{}
type localLinkInfo struct {
Target string
Final string
}

// Loading the latest version's metadata into a deduplication index.
// There's no need to check for probational versions here as only
// non-probational versions ever make it into '..latest'.
last_dedup := map[string]linkMetadata{}
{
asset_dir := filepath.Join(registry, project, asset)
latest_path := filepath.Join(asset_dir, latestFileName)
func resolveLocalSymlink(
project string,
asset string,
version string,
rel string,
details *localLinkInfo,
local_links map[string]localLinkInfo,
manifest map[string]manifestEntry,
traversed map[string]bool,
source string,
) (*manifestEntry, error) {
var target_deets *manifestEntry
man_deets, man_ok := manifest[details.Target]
if man_ok {
target_deets = &man_deets

} else {
_, trav_ok := traversed[rel]
if trav_ok {
return nil, fmt.Errorf("cyclic symlinks detected at '%s'", filepath.Join(source, rel))
}
traversed[rel] = false

_, err := os.Stat(latest_path)
if err == nil {
latest, err := readLatest(asset_dir)
if err != nil {
return fmt.Errorf("failed to identify the latest version; %w", err)
}
rel_deets, rel_ok := local_links[details.Target]
if !rel_ok {
return nil, fmt.Errorf("symlink at '%s' should point to a manifest file or another symlink", filepath.Join(source, rel))
}

manifest, err := readManifest(filepath.Join(asset_dir, latest.Version))
if err != nil {
return fmt.Errorf("failed to read the latest version's manifest; %w", err)
}
ancestor, err := resolveLocalSymlink(project, asset, version, details.Target, &rel_deets, local_links, manifest, traversed, source)
if err != nil {
return nil, err
}

for k, v := range manifest {
self := linkMetadata{
Project: project,
Asset: asset,
Version: latest.Version,
Path: k,
}
if v.Link != nil {
if v.Link.Ancestor != nil {
self.Ancestor = v.Link.Ancestor
} else {
self.Ancestor = v.Link
}
}
last_dedup[strconv.FormatInt(v.Size, 10) + "-" + v.Md5sum] = self
}
target_deets = ancestor
}

} else if !errors.Is(err, os.ErrNotExist) {
return fmt.Errorf("failed to stat '" + latest_path + "; %w", err)
output := manifestEntry{
Size: target_deets.Size,
Md5sum: target_deets.Md5sum,
Link: &linkMetadata{
Project: project,
Asset: asset,
Version: version,
Path: details.Target,
},
}

if target_deets.Link != nil {
if target_deets.Link.Ancestor != nil {
output.Link.Ancestor = target_deets.Link.Ancestor
} else {
output.Link.Ancestor = target_deets.Link
}
}

manifest[rel] = output
return &output, nil
}

func createLocalSymlink(relative_target, relative_link, full_link string) error {
working := relative_link
for {
working = filepath.Dir(working)
if working == "." {
break
}
relative_target = filepath.Join("..", relative_target)
}

err := os.Symlink(relative_target, full_link)
if err != nil {
return fmt.Errorf("failed to create a local symlink at '" + full_link + "'; %w", err)
}
return nil
}

func Transfer(source, registry, project, asset, version string) error {
last_dedup, err := createDedupManifest(registry, project, asset)
if err != nil {
return err
}

// Creating a function to add the links.
links := map[string]map[string]*linkMetadata{}
addLink := func(rel string, link_info *linkMetadata) {
Expand All @@ -239,7 +322,18 @@ func Transfer(source, registry, project, asset, version string) error {
sublinks[base] = link_info
}

err := filepath.WalkDir(source, func(path string, info fs.DirEntry, err error) error {
type basicSymLink struct {
Path string
Rel string
Final string
}
more_links := []basicSymLink{}

// First pass fills the manifest with non-symlink files.
destination := filepath.Join(registry, project, asset, version)
manifest := map[string]manifestEntry{}

err = filepath.WalkDir(source, func(path string, info fs.DirEntry, err error) error {
if err != nil {
return fmt.Errorf("failed to walk into '" + path + "'; %w", err)
}
Expand Down Expand Up @@ -275,35 +369,7 @@ func Transfer(source, registry, project, asset, version string) error {

// Symlinks to files inside the registry are preserved.
if restat.Mode() & os.ModeSymlink == os.ModeSymlink {
target, err := os.Readlink(path)
if err != nil {
return fmt.Errorf("failed to read the symlink at '" + path + "'; %w", err)
}

tstat, err := os.Stat(target)
if err != nil {
return fmt.Errorf("failed to stat link target %q; %w", target, err)
}

inside, err := filepath.Rel(registry, target)
if err != nil || !filepath.IsLocal(inside) {
return fmt.Errorf("symbolic links to files outside the registry (%q) are not supported", target)
}
if tstat.IsDir() {
return fmt.Errorf("symbolic links to directories (%q) are not supported", target)
}

obj, err := resolveSymlink(registry, project, asset, version, inside, manifest_cache, summary_cache)
if err != nil {
return fmt.Errorf("failed to resolve the symlink at '" + path + "'; %w", err)
}
manifest[rel] = *obj

err = createRelativeSymlink(inside, rel, final)
if err != nil {
return fmt.Errorf("failed to create a symlink for '" + rel + "'; %w", err)
}
addLink(rel, obj.Link)
more_links = append(more_links, basicSymLink{ Path: path, Rel: rel, Final: final })
return nil
}

Expand All @@ -322,7 +388,7 @@ func Transfer(source, registry, project, asset, version string) error {
if ok {
man_entry.Link = &last_entry
manifest[rel] = man_entry
err = createRelativeSymlink(filepath.Join(last_entry.Project, last_entry.Asset, last_entry.Version, last_entry.Path), rel, final)
err = createRegistrySymlink(filepath.Join(last_entry.Project, last_entry.Asset, last_entry.Version, last_entry.Path), rel, final)
if err != nil {
return fmt.Errorf("failed to create a symlink for '" + rel + "'; %w", err)
}
Expand Down Expand Up @@ -350,6 +416,71 @@ func Transfer(source, registry, project, asset, version string) error {
return err
}

// Second pass goes through all the symlinks to existing files in the registry.
manifest_cache := map[string]map[string]manifestEntry{}
summary_cache := map[string]bool{}
local_links := map[string]localLinkInfo{}

for _, entry := range more_links {
path := entry.Path
rel := entry.Rel
final := entry.Final

target, err := os.Readlink(path)
if err != nil {
return fmt.Errorf("failed to read the symlink at '" + path + "'; %w", err)
}

if (!filepath.IsAbs(target)) {
target = filepath.Clean(filepath.Join(filepath.Dir(path), target))
}

registry_inside, err := filepath.Rel(registry, target)
if err != nil || !filepath.IsLocal(registry_inside) {
local_inside, err := filepath.Rel(source, target)
if err != nil || !filepath.IsLocal(local_inside) {
return fmt.Errorf("symbolic links to files outside the source or registry directories (%q) are not supported", target)
}
local_links[rel] = localLinkInfo{ Target: local_inside, Final: final }
continue
}

tstat, err := os.Stat(target)
if err != nil {
return fmt.Errorf("failed to stat link target %q; %w", target, err)
}
if tstat.IsDir() {
return fmt.Errorf("symbolic links to directories (%q) are not supported", target)
}

obj, err := resolveRegistrySymlink(registry, project, asset, version, registry_inside, manifest_cache, summary_cache)
if err != nil {
return fmt.Errorf("failed to resolve the symlink at '" + path + "'; %w", err)
}
manifest[rel] = *obj

err = createRegistrySymlink(registry_inside, rel, final)
if err != nil {
return fmt.Errorf("failed to create a symlink for '" + rel + "'; %w", err)
}
addLink(rel, obj.Link)
}

// Third pass to recursively resolve local symlinks.
traversed := map[string]bool{}
for rel, info := range local_links {
man, err := resolveLocalSymlink(project, asset, version, rel, &info, local_links, manifest, traversed, source)
if err != nil {
return err
}

err = createLocalSymlink(info.Target, rel, info.Final)
if err != nil {
return fmt.Errorf("failed to create a symlink for '" + rel + "'; %w", err)
}
addLink(rel, man.Link)
}

// Dumping the JSON metadata.
manifest_path := filepath.Join(destination, manifestFileName)
err = dumpJson(manifest_path, &manifest)
Expand Down
Loading
Loading