diff --git a/MODULE.bazel b/MODULE.bazel index c1b7932..11e75be 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -6,7 +6,7 @@ module( compatibility_level = 1, ) -bazel_dep(name = "bazel_skylib", version = "1.5.0") +bazel_dep(name = "bazel_skylib", version = "1.7.1") bazel_dep(name = "aspect_bazel_lib", version = "2.7.9") bazel_lib_toolchains = use_extension("@aspect_bazel_lib//lib:extensions.bzl", "toolchains") @@ -21,7 +21,7 @@ use_repo(bazel_lib_toolchains, "yq_linux_s390x") use_repo(bazel_lib_toolchains, "yq_windows_amd64") bazel_dep(name = "gazelle", version = "0.34.0", dev_dependency = True, repo_name = "bazel_gazelle") -bazel_dep(name = "bazel_skylib_gazelle_plugin", version = "1.5.0", dev_dependency = True) +bazel_dep(name = "bazel_skylib_gazelle_plugin", version = "1.7.1", dev_dependency = True) bazel_dep(name = "buildifier_prebuilt", version = "6.1.2", dev_dependency = True) bazel_dep(name = "platforms", version = "0.0.10", dev_dependency = True) bazel_dep(name = "rules_oci", version = "2.0.0-rc0", dev_dependency = True) diff --git a/MODULE.bazel.lock b/MODULE.bazel.lock index 4079645..dafe198 100644 --- a/MODULE.bazel.lock +++ b/MODULE.bazel.lock @@ -20,12 +20,12 @@ "https://bcr.bazel.build/modules/bazel_skylib/1.2.1/MODULE.bazel": "f35baf9da0efe45fa3da1696ae906eea3d615ad41e2e3def4aeb4e8bc0ef9a7a", "https://bcr.bazel.build/modules/bazel_skylib/1.3.0/MODULE.bazel": "20228b92868bf5cfc41bda7afc8a8ba2a543201851de39d990ec957b513579c5", "https://bcr.bazel.build/modules/bazel_skylib/1.4.1/MODULE.bazel": "a0dcb779424be33100dcae821e9e27e4f2901d9dfd5333efe5ac6a8d7ab75e1d", - "https://bcr.bazel.build/modules/bazel_skylib/1.4.2/MODULE.bazel": "3bd40978e7a1fac911d5989e6b09d8f64921865a45822d8b09e815eaa726a651", "https://bcr.bazel.build/modules/bazel_skylib/1.5.0/MODULE.bazel": "32880f5e2945ce6a03d1fbd588e9198c0a959bb42297b2cfaf1685b7bc32e138", "https://bcr.bazel.build/modules/bazel_skylib/1.6.1/MODULE.bazel": "8fdee2dbaace6c252131c00e1de4b165dc65af02ea278476187765e1a617b917", - "https://bcr.bazel.build/modules/bazel_skylib/1.6.1/source.json": "082ed5f9837901fada8c68c2f3ddc958bb22b6d654f71dd73f3df30d45d4b749", - "https://bcr.bazel.build/modules/bazel_skylib_gazelle_plugin/1.5.0/MODULE.bazel": "10757f9d22ebe137930a0a677269be86d2986e8abf6b84522d631920a7267743", - "https://bcr.bazel.build/modules/bazel_skylib_gazelle_plugin/1.5.0/source.json": "2c5fb7b2ad5e07bfcc90e1661c3703adb8069ea6b3d9121f647d4288d8b48748", + "https://bcr.bazel.build/modules/bazel_skylib/1.7.1/MODULE.bazel": "3120d80c5861aa616222ec015332e5f8d3171e062e3e804a2a0253e1be26e59b", + "https://bcr.bazel.build/modules/bazel_skylib/1.7.1/source.json": "f121b43eeefc7c29efbd51b83d08631e2347297c95aac9764a701f2a6a2bb953", + "https://bcr.bazel.build/modules/bazel_skylib_gazelle_plugin/1.7.1/MODULE.bazel": "c76b9d256c77c31754c5ac306d395fd47946d8d7470bea2474c3add17b334c3d", + "https://bcr.bazel.build/modules/bazel_skylib_gazelle_plugin/1.7.1/source.json": "25a87991a554369633d706f924f67ca3eb4d9200af1bba7e57dceb85eb9198e4", "https://bcr.bazel.build/modules/buildifier_prebuilt/6.1.2/MODULE.bazel": "2ef4962c8b0b6d8d21928a89190755619254459bc67f870dc0ccb9ba9952d444", "https://bcr.bazel.build/modules/buildifier_prebuilt/6.1.2/source.json": "19fb45ed3f0d55cbff94e402c39512940833ae3a68f9cbfd9518a1926b609c7c", "https://bcr.bazel.build/modules/buildozer/7.1.2/MODULE.bazel": "2e8dd40ede9c454042645fd8d8d0cd1527966aa5c919de86661e62953cd73d84", @@ -813,7 +813,7 @@ "@@gazelle~//:extensions.bzl%go_deps": { "general": { "bzlTransitiveDigest": "Taobh9Bi1JpF4jHwuw6x9ceWDpHtCjGmS8VXbxLOqH8=", - "usagesDigest": "OF7bvO+xWblkUXgQsIPSIr048t6z7ZKSjlR8twO/efg=", + "usagesDigest": "dKNKvQJECrWkRG++E5OdDZZAclQbs9RKSkeI+WU2EpA=", "recordedFileInputs": { "@@rules_go~//go.mod": "a7143f329c2a3e0b983ce74a96c0c25b0d0c59d236d75f7e1b069aadd988d55e", "@@gazelle~//go.sum": "7469786f3930030c430969cedae951e6947cb40f4a563dac94a350659c0fedc4", @@ -1688,7 +1688,7 @@ }, "@@rules_oci~//oci:extensions.bzl%oci": { "general": { - "bzlTransitiveDigest": "6G6tDFJTPCtKyxon8Br4ev91dRdgBbCkorJmAgiIagc=", + "bzlTransitiveDigest": "3HRH6B82zu14f5XyCoQciUqaObjafWuRersO7BtZxGU=", "usagesDigest": "Pu/P+SVB+Qbdzl3wU3n5aOMsoK6nTX8UJYRkP9qCPpE=", "recordedFileInputs": {}, "recordedDirentsInputs": {}, diff --git a/WORKSPACE.bazel b/WORKSPACE.bazel index ac68fbb..6afc903 100644 --- a/WORKSPACE.bazel +++ b/WORKSPACE.bazel @@ -36,6 +36,28 @@ load("@bullseye//:packages.bzl", "bullseye_packages") bullseye_packages() +# bazel run @bullseye_rproject//:lock +deb_index( + name = "bullseye_rproject", + lock = "//examples/debian_flat_repo:bullseye_rproject.lock.json", + manifest = "//examples/debian_flat_repo:bullseye_rproject.yaml", +) + +load("@bullseye_rproject//:packages.bzl", "bullseye_rproject_packages") + +bullseye_rproject_packages() + +# bazel run @nvidia_ubuntu2404_cuda//:lock +deb_index( + name = "nvidia_ubuntu2404_cuda", + lock = "//examples/debian_flat_repo:nvidia_ubuntu2404_cuda.lock.json", + manifest = "//examples/debian_flat_repo:nvidia_ubuntu2404_cuda.yaml", +) + +load("@nvidia_ubuntu2404_cuda//:packages.bzl", "nvidia_ubuntu2404_cuda_packages") + +nvidia_ubuntu2404_cuda_packages() + # bazel run @apt_security//:lock deb_index( name = "apt_security", diff --git a/apt/private/BUILD.bazel b/apt/private/BUILD.bazel index c0f7597..3cee180 100644 --- a/apt/private/BUILD.bazel +++ b/apt/private/BUILD.bazel @@ -42,7 +42,10 @@ bzl_library( name = "package_index", srcs = ["package_index.bzl"], visibility = ["//apt:__subpackages__"], - deps = [":util"], + deps = [ + ":util", + "@bazel_skylib//lib:paths", + ], ) bzl_library( diff --git a/apt/private/lockfile.bzl b/apt/private/lockfile.bzl index 09e2fd9..03fab30 100644 --- a/apt/private/lockfile.bzl +++ b/apt/private/lockfile.bzl @@ -20,7 +20,7 @@ def _add_package(lock, package, arch): "key": k, "name": package["Package"], "version": package["Version"], - "url": "%s/%s" % (package["Root"], package["Filename"]), + "url": package["FileUrl"], "sha256": package["SHA256"], "arch": arch, "dependencies": [], diff --git a/apt/private/package_index.bzl b/apt/private/package_index.bzl index d498767..7e829fb 100644 --- a/apt/private/package_index.bzl +++ b/apt/private/package_index.bzl @@ -1,9 +1,10 @@ "package index" +load("@bazel_skylib//lib:paths.bzl", "paths") load(":util.bzl", "util") -def _fetch_package_index(rctx, url, dist, comp, arch, integrity): - target_triple = "{dist}/{comp}/{arch}".format(dist = dist, comp = comp, arch = arch) +def _fetch_package_index(rctx, url, arch, dist = None, comp = None, directory = None): + # TODO: validate mutually exclusive args (dist, comp) VS directory # See https://linux.die.net/man/1/xz and https://linux.die.net/man/1/gzip # --keep -> keep the original file (Bazel might be still committing the output to the cache) @@ -16,23 +17,53 @@ def _fetch_package_index(rctx, url, dist, comp, arch, integrity): failed_attempts = [] - for (ext, cmd) in supported_extensions.items(): - output = "{}/Packages.{}".format(target_triple, ext) - dist_url = "{}/dists/{}/{}/binary-{}/Packages.{}".format(url, dist, comp, arch, ext) + for ext, cmd in supported_extensions.items(): + index = "Packages" + index_full = "{}.{}".format(index, ext) + + if directory == None: # canonical repo + output = "{dist}/{comp}/{arch}/{index}".format( + dist = dist, + comp = comp, + arch = arch, + index = index, + ) + + index_url = "{url}/dists/{dist}/{comp}/binary-{arch}/{index_full}".format( + url = url, + dist = dist, + comp = comp, + arch = arch, + index_full = index_full, + ) + else: # flat repo + output = "{directory}/{arch}/{index}".format( + directory = directory, + arch = arch, + index = index, + ) + + index_url = "{url}/{directory}/{index_full}".format( + url = url, + directory = directory, + index_full = index_full, + ) + + output_full = "{}.{}".format(output, ext) + download = rctx.download( - url = dist_url, - output = output, - integrity = integrity, + url = index_url, + output = output_full, allow_fail = True, ) decompress_r = None if download.success: - decompress_r = rctx.execute(cmd + [output]) + decompress_r = rctx.execute(cmd + [output_full]) if decompress_r.return_code == 0: integrity = download.integrity break - failed_attempts.append((dist_url, download, decompress_r)) + failed_attempts.append((index_url, download, decompress_r)) if len(failed_attempts) == len(supported_extensions): attempt_messages = [] @@ -51,11 +82,67 @@ def _fetch_package_index(rctx, url, dist, comp, arch, integrity): {} """.format(len(failed_attempts), "\n".join(attempt_messages))) - return ("{}/Packages".format(target_triple), integrity) + return (output, integrity) + +def _parse_url(url): + scheme = "" + host = "" + path = "/" + + if "://" not in url: + fail("Invalid URL: %s" % url) + + scheme, url_ = url.split("://", 1) + + if "/" in url_: + host, path_ = url_.split("/", 1) + path += path_ + else: + host = url + + return struct(scheme = scheme, host = host, path = path) + +def _make_file_url(pkg, root_url_, directory = None): + root_url = _parse_url(root_url_) + + filename = pkg["Filename"] + + invalid_filename = not paths.is_normalized( + filename, + look_for_same_level_references = True, + ) + + if invalid_filename: + # NOTE: + # Although the Debian repo spec for 'Filename' (see + # https://wiki.debian.org/DebianRepository/Format#Filename) clearly + # says that 'Filename' should be relative to the base directory of the + # repo and should be in canonical form (i.e. without '.' or '..') there + # are cases where this is not honored. + # + # In those cases we try to work around this by assuming 'Filename' is + # relative to the sources.list directory/ so we combine them and + # normalize the new 'Filename' path. + # + # Note that, so far, only the NVIDIA CUDA repos needed this workaround + # so maybe this heuristic will break for other repos that don't conform + # to the Debian repo spec. + filename = paths.normalize(paths.join(directory, filename)) + + file_url = "{}://{}{}".format( + root_url.scheme, + root_url.host, + paths.join(root_url.path, filename), + ) -def _parse_package_index(state, contents, arch, root): + return file_url, invalid_filename + +def _parse_package_index(state, contents, arch, root_url, directory = None): last_key = "" pkg = {} + total_pkgs = 0 + out_of_spec = [] + for group in contents.split("\n\n"): for line in group.split("\n"): if line.strip() == "": @@ -82,10 +169,20 @@ def _parse_package_index(state, contents, arch, root): pkg[key] = value if len(pkg.keys()) != 0: - pkg["Root"] = root - util.set_dict(state.packages, value = pkg, keys = (arch, pkg["Package"], pkg["Version"])) + pkg["FileUrl"], invalid_filename = _make_file_url(pkg, root_url, directory) + + if invalid_filename: + out_of_spec.append(pkg["Package"]) + + # NOTE: this fixes the arch for multi-arch flat repos + arch_ = arch if pkg["Architecture"] == "all" else pkg["Architecture"] + + util.set_dict(state.packages, value = pkg, keys = (arch_, pkg["Package"], pkg["Version"])) last_key = "" pkg = {} + total_pkgs += 1 + + return out_of_spec, total_pkgs def _package_versions(state, name, arch): if name not in state.packages[arch]: @@ -105,7 +202,16 @@ def _create(rctx, sources, archs): ) for arch in archs: - for (url, dist, comp) in sources: + for source in sources: + if len(source) == 2: # flat repo + url, directory = source + index = directory + dist, comp = None, None + else: + url, dist, comp = source + index = "%s/%s" % (dist, comp) + directory = None + # We assume that `url` does not contain a trailing forward slash when passing to # functions below. If one is present, remove it. Some HTTP servers do not handle # redirects properly when a path contains "//" @@ -113,12 +219,32 @@ def _create(rctx, sources, archs): # on misconfigured HTTP servers) url = url.rstrip("/") - rctx.report_progress("Fetching package index: {}/{} for {}".format(dist, comp, arch)) - (output, _) = _fetch_package_index(rctx, url, dist, comp, arch, "") + rctx.report_progress("Fetching %s package index: %s" % (arch, index)) + output, _ = _fetch_package_index( + rctx, + url, + arch, + dist = dist, + comp = comp, + directory = directory, + ) + + rctx.report_progress("Parsing %s package index: %s" % (arch, index)) # TODO: this is expensive to perform. - rctx.report_progress("Parsing package index: {}/{} for {}".format(dist, comp, arch)) - _parse_package_index(state, rctx.read(output), arch, url) + out_of_spec, total_pkgs = _parse_package_index( + state, + rctx.read(output), + arch, + url, + directory, + ) + + if out_of_spec: + count = len(out_of_spec) + pct = int(100.0 * count / total_pkgs) + msg = "Warning: {} index {} has {} packages ({}%) with invalid 'Filename' fields" + print(msg.format(arch, index, count, pct)) return struct( package_versions = lambda **kwargs: _package_versions(state, **kwargs), diff --git a/apt/private/resolve.bzl b/apt/private/resolve.bzl index 7fa93b1..1bd4b39 100644 --- a/apt/private/resolve.bzl +++ b/apt/private/resolve.bzl @@ -40,13 +40,27 @@ def internal_resolve(rctx, yq_toolchain_prefix, manifest, include_transitive): sources = [] for src in manifest["sources"]: - distr, components = src["channel"].split(" ", 1) - for comp in components.split(" "): - sources.append(( - src["url"], - distr, - comp, - )) + channel_chunks = src["channel"].split(" ") + + if len(channel_chunks) == 1: + # it's a flat repo, see: + # https://wiki.debian.org/DebianRepository/Format#Flat_Repository_Format + # vs the "canonical" repo: + # https://wiki.debian.org/DebianRepository/Format#Overview + directory = channel_chunks[0] + + if not directory.endswith("/"): + fail("Debian flat repo directory must end in '/'") + + sources.append((src["url"], directory.rstrip("/"))) + else: + distr, components = channel_chunks[0], channel_chunks[1:] + + if distr.endswith("/"): + fail("Debian distribution ends in '/' but this is not a flat repo") + + for comp in components: + sources.append((src["url"], distr, comp)) pkgindex = package_index.new(rctx, sources = sources, archs = manifest["archs"]) pkgresolution = package_resolution.new(index = pkgindex) diff --git a/examples/debian_flat_repo/BUILD.bazel b/examples/debian_flat_repo/BUILD.bazel new file mode 100644 index 0000000..f536061 --- /dev/null +++ b/examples/debian_flat_repo/BUILD.bazel @@ -0,0 +1,76 @@ +load("@container_structure_test//:defs.bzl", "container_structure_test") +load("@rules_distroless//apt:defs.bzl", "dpkg_status") +load("@rules_oci//oci:defs.bzl", "oci_image", "oci_load") + +PACKAGES = [ + "@bullseye//dpkg", + "@bullseye//apt", +] + +PACKAGES_AMD64 = PACKAGES + [ + "@bullseye_rproject//r-mathlib", + "@nvidia_ubuntu2404_cuda//nvidia-container-toolkit-base", +] + +PACKAGES_ARM64 = PACKAGES + [ + "@nvidia_ubuntu2404_cuda//nvidia-container-toolkit-base", +] + +# Creates /var/lib/dpkg/status with installed package information. +dpkg_status( + name = "dpkg_status", + controls = select({ + "@platforms//cpu:x86_64": [ + "%s/amd64:control" % package + for package in PACKAGES_AMD64 + ], + "@platforms//cpu:arm64": [ + "%s/arm64:control" % package + for package in PACKAGES_ARM64 + ], + }), +) + +oci_image( + name = "apt", + architecture = select({ + "@platforms//cpu:x86_64": "amd64", + "@platforms//cpu:arm64": "arm64", + }), + os = "linux", + tars = [ + ":dpkg_status", + ] + select({ + "@platforms//cpu:x86_64": [ + "%s/amd64" % package + for package in PACKAGES_AMD64 + ], + "@platforms//cpu:arm64": [ + "%s/arm64" % package + for package in PACKAGES_ARM64 + ], + }), +) + +oci_load( + name = "tarball", + image = ":apt", + repo_tags = [ + "distroless/test:latest", + ], +) + +container_structure_test( + name = "test", + configs = select({ + "@platforms//cpu:x86_64": ["test_linux_amd64.yaml"], + "@platforms//cpu:arm64": ["test_linux_arm64.yaml"], + }), + image = ":apt", + target_compatible_with = select({ + "@platforms//cpu:x86_64": ["@platforms//cpu:x86_64"], + "@platforms//cpu:arm64": ["@platforms//cpu:arm64"], + }) + [ + "@platforms//os:linux", + ], +) diff --git a/examples/debian_flat_repo/bullseye_rproject.lock.json b/examples/debian_flat_repo/bullseye_rproject.lock.json new file mode 100644 index 0000000..90eb9d8 --- /dev/null +++ b/examples/debian_flat_repo/bullseye_rproject.lock.json @@ -0,0 +1,14 @@ +{ + "packages": [ + { + "arch": "amd64", + "dependencies": [], + "key": "r-mathlib_4.4.1-1_bullseyecran.0_amd64", + "name": "r-mathlib", + "sha256": "cbe3abbcc74261f2ad84159b423b856c1a0b4ebe6fef2de763d8783ff00245d5", + "url": "https://cloud.r-project.org/bin/linux/debian/bullseye-cran40/r-mathlib_4.4.1-1~bullseyecran.0_amd64.deb", + "version": "4.4.1-1~bullseyecran.0" + } + ], + "version": 1 +} \ No newline at end of file diff --git a/examples/debian_flat_repo/bullseye_rproject.yaml b/examples/debian_flat_repo/bullseye_rproject.yaml new file mode 100644 index 0000000..6c1cd90 --- /dev/null +++ b/examples/debian_flat_repo/bullseye_rproject.yaml @@ -0,0 +1,20 @@ +# Packages for examples/debian_flat_repo. +# +# Anytime this file is changed, the lockfile needs to be regenerated. +# +# To generate the bullseye_rproject.lock.json run the following command +# +# bazel run @bullseye_rproject//:lock +# +# See debian_package_index at WORKSPACE.bazel +version: 1 + +sources: + - channel: bullseye-cran40/ + url: https://cloud.r-project.org/bin/linux/debian + +archs: + - amd64 + +packages: + - r-mathlib diff --git a/examples/debian_flat_repo/nvidia_ubuntu2404_cuda.lock.json b/examples/debian_flat_repo/nvidia_ubuntu2404_cuda.lock.json new file mode 100644 index 0000000..8a9d660 --- /dev/null +++ b/examples/debian_flat_repo/nvidia_ubuntu2404_cuda.lock.json @@ -0,0 +1,23 @@ +{ + "packages": [ + { + "arch": "amd64", + "dependencies": [], + "key": "nvidia-container-toolkit-base_1.16.1-1_amd64", + "name": "nvidia-container-toolkit-base", + "sha256": "8184d04f88215de4f630e4f5ba24d9bf7e64a7a597ba2e3c6fbd94f86bea0599", + "url": "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/nvidia-container-toolkit-base_1.16.1-1_amd64.deb", + "version": "1.16.1-1" + }, + { + "arch": "arm64", + "dependencies": [], + "key": "nvidia-container-toolkit-base_1.16.1-1_arm64", + "name": "nvidia-container-toolkit-base", + "sha256": "dfc068e5ff69274351e59376078d9bda6a6c95423c7de1619b6a54aa9ba0f494", + "url": "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/arm64/nvidia-container-toolkit-base_1.16.1-1_arm64.deb", + "version": "1.16.1-1" + } + ], + "version": 1 +} \ No newline at end of file diff --git a/examples/debian_flat_repo/nvidia_ubuntu2404_cuda.yaml b/examples/debian_flat_repo/nvidia_ubuntu2404_cuda.yaml new file mode 100644 index 0000000..9d1fd8c --- /dev/null +++ b/examples/debian_flat_repo/nvidia_ubuntu2404_cuda.yaml @@ -0,0 +1,23 @@ +# Packages for examples/debian_flat_repo. +# +# Anytime this file is changed, the lockfile needs to be regenerated. +# +# To generate the nvidia_cuda.lock.json run the following command +# +# bazel run @nvidia_ubuntu2404_cuda//:lock +# +# See debian_package_index at WORKSPACE.bazel +version: 1 + +sources: + - channel: ubuntu2404/x86_64/ + url: https://developer.download.nvidia.com/compute/cuda/repos + - channel: ubuntu2404/arm64/ + url: https://developer.download.nvidia.com/compute/cuda/repos + +archs: + - amd64 + - arm64 + +packages: + - nvidia-container-toolkit-base diff --git a/examples/debian_flat_repo/test_linux_amd64.yaml b/examples/debian_flat_repo/test_linux_amd64.yaml new file mode 100644 index 0000000..95c703c --- /dev/null +++ b/examples/debian_flat_repo/test_linux_amd64.yaml @@ -0,0 +1,10 @@ +schemaVersion: "2.0.0" + +commandTests: + - name: "apt list --installed" + command: "apt" + args: ["list", "--installed"] + expectedOutput: + - Listing\.\.\. + - r-mathlib/now 4.4.1-1~bullseyecran.0 amd64 \[installed,local\] + - nvidia-container-toolkit-base/now 1.16.1-1 amd64 \[installed,local\] diff --git a/examples/debian_flat_repo/test_linux_arm64.yaml b/examples/debian_flat_repo/test_linux_arm64.yaml new file mode 100644 index 0000000..7af4d70 --- /dev/null +++ b/examples/debian_flat_repo/test_linux_arm64.yaml @@ -0,0 +1,9 @@ +schemaVersion: "2.0.0" + +commandTests: + - name: "apt list --installed" + command: "apt" + args: ["list", "--installed"] + expectedOutput: + - Listing\.\.\. + - nvidia-container-toolkit-base/now 1.16.1-1 arm64 \[installed,local\]