From 6cd972994ddd6cca1376260dbd2675eb77187ae8 Mon Sep 17 00:00:00 2001 From: Allan Jude Date: Sat, 29 Jun 2024 19:34:36 +0000 Subject: [PATCH] Add support for hashed mode to Poudriere Creates the repo with hash-based filenames to allow use of a CDN Setting `PKG_HASH="yes"` in poudriere.conf will build a repo where all of the packages are in All/Hashed/ and the repo manifest points there. It also creates a set of symlinks in the All/ directory, but these are purely for poudriere itself, to find dependencies. The symlinks should NOT be published, only the Hashed/ directory is required. This mechanism ensures that the package files themselves can be cached by a CDN as the filename will change if the contents ever differ. The repo metadata files (those outside of All/) should be set to have a very low cache expiration, so that when a new package set is published they are updated and reflect the new packages. Technically this feature also allows "previous" versions of packages to continue to be available via the CDN, but that is a side-effect not a purposeful feature. Sponsored-by: Klara, Inc. Sponsored-by: TitanHQ --- src/etc/poudriere.conf.sample | 13 +++++++++++++ src/man/poudriere-bulk.8 | 10 ++++++---- src/share/poudriere/bulk.sh | 7 ++++++- src/share/poudriere/common.sh | 26 +++++++++++++++++++++++++- src/share/poudriere/include/pkg.sh | 6 ++++++ src/share/poudriere/pkgclean.sh | 4 ++++ 6 files changed, 60 insertions(+), 6 deletions(-) mode change 100755 => 100644 src/share/poudriere/pkgclean.sh diff --git a/src/etc/poudriere.conf.sample b/src/etc/poudriere.conf.sample index c0ba896d92..337eb18279 100644 --- a/src/etc/poudriere.conf.sample +++ b/src/etc/poudriere.conf.sample @@ -383,3 +383,16 @@ DISTFILES_CACHE=/usr/ports/distfiles # be fetched. # Default: everything #PACKAGE_FETCH_WHITELIST="gcc* rust llvm*" + +# Have pkg create the repo such that each package is named with the short hash +# of its file contents in the package filename, with symlinks to the traditional +# package filenames. The packagesite.yaml file will point to the hashed version +# of these files. By using hashed pkg filenames, this allows users to lazily +# cache packages without conflicting with the existing packages, or serving stale +# packages from a cache. Once the packages are synced the much +# smaller meta files can then be synced. Allowing a near atomic update of the repo. +# On caching CDNs this means a need to purge 2-5 files instead of all pkgs that +# have been updated. +# The symlinks are only required for the local poudriere for resovling dependencies, +# they do not need to be uploaded to the CDN. +#PKG_HASH="no" diff --git a/src/man/poudriere-bulk.8 b/src/man/poudriere-bulk.8 index 81cf698eeb..dac4eb3211 100644 --- a/src/man/poudriere-bulk.8 +++ b/src/man/poudriere-bulk.8 @@ -28,7 +28,7 @@ .\" .\" Note: The date here should be updated whenever a non-trivial .\" change is made to the manual page. -.Dd July 5, 2022 +.Dd September 26, 2022 .Dt POUDRIERE-BULK 8 .Os .Sh NAME @@ -38,7 +38,7 @@ .Nm .Fl a .Fl j Ar name -.Op Fl CcFIikNnRrSTtvw +.Op Fl CcFHIikNnRrSTtvw .Op Fl B Ar name .Op Fl b Ar branch .Op Fl J Ar maxjobs Ns Op Cm \&: Ns Ar prebuildmaxjobs @@ -48,7 +48,7 @@ .Nm .Fl f Ar file Op Fl f Ar file2 Ar ... .Fl j Ar name -.Op Fl CcFIikNnRrSTtvw +.Op Fl CcFHIikNnRrSTtvw .Op Fl B Ar name .Op Fl b Ar branch .Op Fl J Ar maxjobs Ns Op Cm \&: Ns Ar prebuildmaxjobs @@ -57,7 +57,7 @@ .Op Fl z Ar set .Nm .Fl j Ar name -.Op Fl CcFIikNnRrSTtvw +.Op Fl CcFHIikNnRrSTtvw .Op Fl B Ar name .Op Fl b Ar branch .Op Fl J Ar maxjobs Ns Op Cm \&: Ns Ar prebuildmaxjobs @@ -232,6 +232,8 @@ Fetch only from the original Skip .Fx mirrors. +.It Fl H +Create a repository where the package filenames contain the short hash of the contents. .It Fl I Advanced interactive mode. .Pp diff --git a/src/share/poudriere/bulk.sh b/src/share/poudriere/bulk.sh index 3507ceb354..126b3becc6 100755 --- a/src/share/poudriere/bulk.sh +++ b/src/share/poudriere/bulk.sh @@ -47,6 +47,8 @@ Options: -f file. Implies -c for -a. -c -- Clean all the previously built binary packages and logs. -F -- Only fetch from original master_site (skip FreeBSD mirrors) + -H -- Create a repository where the package filenames contain the + short hash of the contents. -I -- Advanced Interactive mode. Leaves jail running with ports installed after test. -i -- Interactive mode. Enter jail for interactive testing and @@ -97,7 +99,7 @@ if [ $# -eq 0 ]; then usage fi -while getopts "ab:B:CcFf:iIj:J:knNO:p:RrSTtvwz:" FLAG; do +while getopts "ab:B:CcFf:HiIj:J:knNO:p:RrSTtvwz:" FLAG; do case "${FLAG}" in a) ALL=1 @@ -126,6 +128,9 @@ while getopts "ab:B:CcFf:iIj:J:knNO:p:RrSTtvwz:" FLAG; do fi LISTPKGS="${LISTPKGS:+${LISTPKGS} }${OPTARG}" ;; + H) + PKG_REPO_FLAGS="${PKG_REPO_FLAGS:+${PKG_REPO_FLAGS} }--hash --symlink" + ;; I) INTERACTIVE_MODE=2 ;; diff --git a/src/share/poudriere/common.sh b/src/share/poudriere/common.sh index a4907362c7..e52cfb432e 100755 --- a/src/share/poudriere/common.sh +++ b/src/share/poudriere/common.sh @@ -6473,6 +6473,10 @@ delete_old_pkg() { if [ -L "${pkg}" ]; then is_sym=1 fi + if [ -d "${pkg}" ] && [ "${pkgfile}" = "Hashed" ]; then + msg_debug "Ignoring directory" + return 0; + fi if [ "${is_sym}" -eq 1 ] && [ ! -e "${pkg}" ]; then msg "Deleting ${COLOR_PORT}${pkgfile}${COLOR_RESET}: dead symlink" delete_pkg "${pkg}" @@ -9468,12 +9472,16 @@ clean_restricted() { } build_repo() { - local origin pkg_repo_list_files + local origin pkg_repo_list_files hashcmd msg "Creating pkg repository" if [ ${DRY_RUN} -eq 1 ]; then return 0 fi + if [ ${PKG_HASH} != "no" ]; then + hashcmd="--hash --symlink" + PKG_REPO_FLAGS="${PKG_REPO_FLAGS:+${PKG_REPO_FLAGS} }$hashcmd" + fi bset status "pkgrepo:" ensure_pkg_installed force_extract || \ err 1 "Unable to extract pkg." @@ -9493,12 +9501,20 @@ build_repo() { install -m 0400 "${PKG_REPO_META_FILE}" \ "${MASTERMNT:?}/tmp/pkgmeta" fi + + # Remount rw + # mount_nullfs does not support mount -u + umount ${UMOUNT_NONBUSY} ${MASTERMNT}/packages || \ + umount -f ${MASTERMNT}/packages + mount_packages + mkdir -p ${MASTERMNT}/tmp/packages if [ -n "${PKG_REPO_SIGNING_KEY}" ]; then msg "Signing repository with key: ${PKG_REPO_SIGNING_KEY}" install -m 0400 "${PKG_REPO_SIGNING_KEY}" \ "${MASTERMNT:?}/tmp/repo.key" injail ${PKG_BIN:?} repo \ + ${PKG_REPO_FLAGS} \ ${pkg_repo_list_files:+"${pkg_repo_list_files}"} \ -o /tmp/packages \ ${PKG_META} \ @@ -9515,6 +9531,7 @@ build_repo() { # using SSH with DNSSEC as older hosts don't support # it. ${MASTERMNT:?}${PKG_BIN:?} repo \ + ${PKG_REPO_FLAGS} \ ${pkg_repo_list_files:+"${pkg_repo_list_files}"} \ -o "${MASTERMNT:?}/tmp/packages" ${PKG_META_MASTERMNT} \ "${MASTERMNT:?}/packages" \ @@ -9527,6 +9544,7 @@ build_repo() { ;; esac JNETNAME="n" injail ${PKG_BIN:?} repo \ + ${PKG_REPO_FLAGS} \ ${pkg_repo_list_files:+"${pkg_repo_list_files}"} \ -o /tmp/packages ${PKG_META} /packages \ ${SIGNING_COMMAND:+signing_command: ${SIGNING_COMMAND}} || @@ -9542,6 +9560,11 @@ build_repo() { sign_pkg pubkey "${PACKAGES:?}/Latest/pkg.${PKG_EXT}" fi fi + + # Remount ro + umount ${UMOUNT_NONBUSY} ${MASTERMNT}/packages || \ + umount -f ${MASTERMNT}/packages + mount_packages -o ro } calculate_size_in_mb() { @@ -10109,6 +10132,7 @@ esac : ${FLAVOR_DEFAULT_ALL:=no} : ${NULLFS_PATHS:="/rescue /usr/share /usr/tests /usr/lib32"} : ${PACKAGE_FETCH_URL:="pkg+http://pkg.FreeBSD.org/\${ABI}"} +: ${PKG_HASH:=no} : ${POUDRIERE_TMPDIR:=$(command mktemp -dt poudriere)} : ${SHASH_VAR_PATH_DEFAULT:=${POUDRIERE_TMPDIR}} diff --git a/src/share/poudriere/include/pkg.sh b/src/share/poudriere/include/pkg.sh index d4821400cb..360518520d 100644 --- a/src/share/poudriere/include/pkg.sh +++ b/src/share/poudriere/include/pkg.sh @@ -400,6 +400,10 @@ delete_pkg() { local pkg="$1" clear_pkg_cache "${pkg}" + + # If ${pkg} is a symlink, delete the target as well + [ -L "${pkg}" ] && unlink $(realpath "${pkg}") + # Delete the package and the depsfile since this package is being deleted, # which will force it to be recreated unlink "${pkg}" @@ -417,6 +421,8 @@ delete_pkg_xargs() { # Delete the package and the depsfile since this package is being deleted, # which will force it to be recreated { + # If ${pkg} is a symlink, delete the target as well + [ -L "${pkg}" ] && echo $(realpath "${pkg}") echo "${pkg}" echo "${pkg_cache_dir}" } >> "${listfile}" diff --git a/src/share/poudriere/pkgclean.sh b/src/share/poudriere/pkgclean.sh old mode 100755 new mode 100644 index fafc3c0a54..5458d3d04d --- a/src/share/poudriere/pkgclean.sh +++ b/src/share/poudriere/pkgclean.sh @@ -289,6 +289,8 @@ check_should_delete_pkg() { *".${PKG_EXT}") if should_delete "${file}"; then echo "${file}" >> "${BADFILES_LIST:?}" + # If the pkg is a symlink to a hashed package, remove the hashed version as well + [ -L "${file}" ] && echo "$(realpath ${file})" >> ${BADFILES_LIST} fi ;; *.txz) @@ -305,6 +307,8 @@ check_should_delete_pkg() { *) msg_verbose "Found incorrect format file: ${file}" echo "${file}" >> "${BADFILES_LIST:?}" + # If the pkg is a symlink to a hashed package, remove the hashed version as well + [ -L "${file}" ] && echo "$(realpath ${file})" >> ${BADFILES_LIST} ;; esac }