From 0a9ca9f409f3c2ba9338175d728e15aed6abbe2c Mon Sep 17 00:00:00 2001 From: Dimi Kot Date: Sun, 15 Dec 2024 02:14:59 -0800 Subject: [PATCH] Tweak gc to keep up to --storage-keep-hint-slots=5 newest slots with unique hints, even if those slots are old (#34) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## PRs in the Stack - ➡ #34 (The stack is managed by [git-grok](https://github.com/dimikot/git-grok).) --- README.md | 9 +++- action.yml | 7 ++- ci-storage | 53 ++++++++++++++++--- ...ps-newest-slot-per-each-hint-on-gc.test.sh | 51 ++++++++++++++++++ 4 files changed, 110 insertions(+), 10 deletions(-) create mode 100755 tests/0070-keeps-newest-slot-per-each-hint-on-gc.test.sh diff --git a/README.md b/README.md index 2ae0094..cd8025c 100644 --- a/README.md +++ b/README.md @@ -60,10 +60,17 @@ ones, so rsync can run efficiently. # Default: /mnt storage-dir: '' - # Remove slots created earlier than this many seconds ago. + # Remove slots created earlier than this many seconds ago. The exception is + # the newest slot (it's always kept), and also up to --storage-keep-hint-slots + # slots related to unique hints. # Default: 14400 (4 hours). storage-max-age-sec: '' + # Defines the number of unique hints, for which ci-storage will keep at + # least one newest slot, even if is past --storage-max-age-sec. + # Default: 5. + storage-keep-hint-slots: '' + # Id of the slot to store to or load from. Use "*" to load a smart-random # slot (e.g. most recent or best in terms of layer compatibility) and skip # if it does not exist. diff --git a/action.yml b/action.yml index fbd6c66..50fe12d 100644 --- a/action.yml +++ b/action.yml @@ -14,7 +14,10 @@ inputs: description: "Storage directory on the remote host. Notice that, when building the final directory on the storage host, owner and repo are always appended, so the path will be {storage-dir}/{owner}/{repo}/{slug(local-dir)} or {storage-dir}/{owner}/{repo}/{slug(local-dir)}.{layer-name}. Default: /mnt" required: false storage-max-age-sec: - description: "Remove slots created earlier than this many seconds ago. Default: 14400 (4 hours)." + description: "Remove slots created earlier than this many seconds ago. The exception is the newest slot (it's always kept), and also up to --storage-keep-hint-slots slots related to unique hints. Default: 3600 (1 hour)." + required: false + storage-keep-hint-slots: + description: "Defines the number of unique hints, for which ci-storage will keep at least one newest slot, even if is past --storage-max-age-sec. Default: 5." required: false slot-id: description: 'Id of the slot to store to or load from; use "*" to load a smart-random slot (e.g. most recent or best in terms of layer compatibility) and skip if it does not exist. Default: $GITHUB_RUN_ID (which is friendly to "Re-run failed jobs").' @@ -58,6 +61,7 @@ runs: storage_host="${{ inputs.storage-host || '' }}" storage_dir="${{ inputs.storage-dir || '/mnt' }}/${{ github.repository }}" storage_max_age_sec="${{ inputs.storage-max-age-sec || '' }}" + storage_keep_hint_slots="${{ inputs.storage-keep-hint-slots || '' }}" slot_id="${{ inputs.slot-id }}" local_dir="${{ inputs.local-dir || '.' }}" hint="${{ inputs.hint || '' }}" @@ -103,6 +107,7 @@ runs: --storage-host="$storage_host" --storage-dir="$storage_dir" --storage-max-age-sec="$storage_max_age_sec" + --storage-keep-hint-slots="$storage_keep_hint_slots" --slot-id="$slot_id" --local-dir="$local_dir" --hint="$hint" diff --git a/ci-storage b/ci-storage index b1156b0..58fa585 100755 --- a/ci-storage +++ b/ci-storage @@ -17,6 +17,7 @@ import typing STORAGE_MAX_AGE_SEC_DEFAULT = 3600 STORAGE_MAX_AGE_SEC_BAK = 60 +STORAGE_KEEP_HINT_SLOTS_DEFAULT = 5 STORAGE_DIR_DEFAULT = "~/ci-storage" META_FILE = ".ci-storage.meta" EMPTY_DIR = ".ci-storage.empty-dir" @@ -76,7 +77,14 @@ def main(): type=str, default=str(STORAGE_MAX_AGE_SEC_DEFAULT), required=False, - help="Remove slots created earlier than this many seconds ago.", + help="Remove slots created earlier than this many seconds ago. The exception is the newest slot (it's always kept), and also up to --storage-keep-hint-slots slots related to unique hints.", + ) + parser.add_argument( + "--storage-keep-hint-slots", + type=str, + default=str(STORAGE_KEEP_HINT_SLOTS_DEFAULT), + required=False, + help="Defines the number of unique hints, for which ci-storage will keep at least one newest slot, even if is past --storage-max-age-sec.", ) parser.add_argument( "--slot-id", @@ -131,6 +139,9 @@ def main(): storage_max_age_sec: int = int( args.storage_max_age_sec or str(STORAGE_MAX_AGE_SEC_DEFAULT) ) + storage_keep_hint_slots: int = int( + args.storage_keep_hint_slots or str(STORAGE_KEEP_HINT_SLOTS_DEFAULT) + ) slot_ids: list[str] = " ".join(args.slot_id).split() local_dir: str = re.sub(r"/+$", "", args.local_dir) hints: list[str] = [ @@ -175,6 +186,7 @@ def main(): storage_host=storage_host, storage_dir=storage_dir, storage_max_age_sec=storage_max_age_sec, + storage_keep_hint_slots=storage_keep_hint_slots, ) elif action == "load": if not slot_ids: @@ -432,12 +444,13 @@ def action_maintenance( storage_host: str | None, storage_dir: str, storage_max_age_sec: int, + storage_keep_hint_slots: int, ): print( check_output_script( host=storage_host, script=SCRIPTS["MAINTENANCE"], - args=[storage_dir, str(storage_max_age_sec)], + args=[storage_dir, str(storage_max_age_sec), str(storage_keep_hint_slots)], indent=True, ), end="", @@ -933,7 +946,8 @@ SLOT_INFOS = textwrap.dedent( age_sec => time() - $inode_ctime, dir => $dir, meta => $meta, - is_garbage => $slot_id =~ /\./ ? 1 : 0, + meta_hints => $meta =~ /^hints=(.*)/m ? [grep(/./s, split(/\s+/s, $1))] : [], + is_tmp_or_bak => $slot_id =~ /\./ ? 1 : 0, is_bak => $slot_id =~ /\.bak\.\d+$/s ? 1 : 0, }; } else { @@ -969,7 +983,7 @@ SCRIPTS = { } %(SLOT_INFOS)s my @slot_infos = - grep { !$_->{is_garbage} } + grep { !$_->{is_tmp_or_bak} } slot_infos($storage_dir); if (@slot_infos) { my $newest_dir = $slot_infos[0]{dir}; @@ -1028,6 +1042,7 @@ SCRIPTS = { *STDERR->autoflush(1); my $storage_dir = $ARGV[0] or die("storage_dir argument required\n"); my $storage_max_age_sec = $ARGV[1] or die("storage_max_age_sec argument required\n"); + my $storage_keep_hint_slots = $ARGV[2] or die("storage_keep_hint_slots argument required\n"); length($storage_dir) >= 3 or die("storage_dir is suspiciously short\n"); my $lock_file = "$storage_dir/maintenance.lock"; open(my $lock, ">>", $lock_file) or die("open $lock_file: $!\n"); @@ -1037,14 +1052,34 @@ SCRIPTS = { } %(SLOT_INFOS)s my @slot_infos = slot_infos($storage_dir); - my $slot_dir_newest = (map { $_->{dir} } grep { !$_->{is_garbage} } @slot_infos)[0]; + my $slot_dir_newest = (map { $_->{dir} } grep { !$_->{is_tmp_or_bak} } @slot_infos)[0]; + my %%slot_dir_newest_per_hint = + map { $_->{meta_hints}[0], $_->{dir} } + grep { !$_->{is_tmp_or_bak} && defined($_->{meta_hints}[0]) } + reverse(@slot_infos); my @rm_dirs = (); + my $kept_per_hint_slots = 0; foreach my $info (@slot_infos) { my $dir = $info->{dir}; my $age_sec = $info->{age_sec}; my $is_bak = $info->{is_bak}; - if (defined($slot_dir_newest) && $dir eq $slot_dir_newest) { - # Never delete the latest slot, even if it is old. + my $hint = $info->{meta_hints}[0]; + my $suffix = (defined($hint) ? "hint=$hint, " : "") . "age=${age_sec}s"; + if ( + defined($slot_dir_newest) && + $dir eq $slot_dir_newest + ) { + print("keeping $dir, the newest slot overall ($suffix)\n"); + next; + } + if ( + defined($hint) && + defined($slot_dir_newest_per_hint{$hint}) && + $dir eq $slot_dir_newest_per_hint{$hint} && + $kept_per_hint_slots < $storage_keep_hint_slots + ) { + print("keeping $dir, the newest slot with this hint ($suffix)\n"); + $kept_per_hint_slots++; next; } if ( @@ -1052,7 +1087,9 @@ SCRIPTS = { $is_bak && $age_sec > %(STORAGE_MAX_AGE_SEC_BAK)d ) { push(@rm_dirs, $dir); - print("will remove $dir (age: $age_sec sec) in background\n"); + print("will remove $dir in background ($suffix)\n"); + } else { + print("keeping $dir, new enough ($suffix)\n"); } } if (!@rm_dirs) { diff --git a/tests/0070-keeps-newest-slot-per-each-hint-on-gc.test.sh b/tests/0070-keeps-newest-slot-per-each-hint-on-gc.test.sh new file mode 100755 index 0000000..fef1047 --- /dev/null +++ b/tests/0070-keeps-newest-slot-per-each-hint-on-gc.test.sh @@ -0,0 +1,51 @@ +#!/bin/bash +source ./common.sh + +ci-storage \ + --slot-id=myslot-a1 \ + --hint="a" \ + store +ci-storage \ + --slot-id=myslot-a2 \ + --hint="a" \ + store + +ci-storage \ + --slot-id=myslot-b1 \ + --hint="b" \ + store +ci-storage \ + --slot-id=myslot-b2 \ + --hint="b" \ + store + +ci-storage \ + --slot-id=myslot-c1 \ + --hint="c" \ + store +ci-storage \ + --slot-id=myslot-c2 \ + --hint="c" \ + store + +ci-storage \ + --slot-id=myslot-d1 \ + --hint="d" \ + store +ci-storage \ + --slot-id=myslot-d2 \ + --hint="d" \ + store + +ci-storage \ + --slot-id=myslot-x \ + --hint="x" \ + --storage-keep-hint-slots=2 \ + store + +grep -qF 'myslot-x, the newest slot overall' "$OUT" +grep -qF 'myslot-d2, the newest slot with this hint (hint=d' "$OUT" +grep -qF 'myslot-c2, the newest slot with this hint (hint=c' "$OUT" +grep -qF 'myslot-b2, new enough' "$OUT" +grep -qF 'myslot-a2, new enough' "$OUT" +