From 6526f0961952b34a7a7948d0fb4a5c5fd79b0dd7 Mon Sep 17 00:00:00 2001 From: rbikar <31000668+rbikar@users.noreply.github.com> Date: Thu, 23 Nov 2023 15:27:29 +0100 Subject: [PATCH] Fix keeping n latest rpms for different arches [RHELDST-21855] (#58) * Fix keeping n latest rpms for different arches [RHELDST-21855] Yum repositories can contain rpms with different arches of the same package. We need to make sure that only the highest and the same version of rpms are kept. Previously output contained in some cases pkg.arch_x.v_1 and pkg.arch_y.v2 which is incorrect because of mixed versions and for some workflows system can end up in the uninstallable state. This change makes sure that only pkg.arch_x.v_1 and pkg_arch_y.v_1 (if present) will be in output and it won't end with mixed versions. This is also fixed for parameter n>=2 which not used currently in practice. * [pre-commit.ci] auto fixes from black and isort code formatting --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- tests/test_utils.py | 108 +++++++++++++++++-- ubi_manifest/worker/tasks/depsolver/utils.py | 17 ++- 2 files changed, 110 insertions(+), 15 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 9dd60a52..eced5258 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -99,7 +99,7 @@ def test_keep_n_latest_rpms(): assert rpms[0].version == "11" -def test_keep_n_latest_rpms_multiple_arches(): +def test_keep_n_latest_rpms_multiple_arches_default_n(): """Test keeping only the latest version of rpm for multiple arches""" unit_1 = get_ubi_unit( @@ -107,7 +107,7 @@ def test_keep_n_latest_rpms_multiple_arches(): "test_repo_id", name="test", version="10", - release="20", + release="el8", arch="x86_64", ) unit_2 = get_ubi_unit( @@ -115,7 +115,7 @@ def test_keep_n_latest_rpms_multiple_arches(): "test_repo_id", name="test", version="11", - release="20", + release="el8", arch="x86_64", ) unit_3 = get_ubi_unit( @@ -123,20 +123,19 @@ def test_keep_n_latest_rpms_multiple_arches(): "test_repo_id", name="test", version="10", - release="20", + release="el8", arch="i686", ) unit_4 = get_ubi_unit( RpmUnit, "test_repo_id", name="test", - version="9", - release="20", + version="11", + release="el8", arch="i686", ) rpms = [unit_1, unit_2, unit_3, unit_4] - rpms.sort(key=vercmp_sort()) _keep_n_latest_rpms(rpms) # sort by version, the order after _keep_n_latest_rpms() is not guaranteed in this case @@ -146,12 +145,101 @@ def test_keep_n_latest_rpms_multiple_arches(): assert len(rpms) == 2 # i686 rpm goes with its highest version - assert rpms[0].version == "10" - assert rpms[0].arch == "i686" + assert rpms[0].version == "11" + assert rpms[0].release == "el8" + assert rpms[0].arch == "x86_64" # x86_64 rpm goes with its highest version assert rpms[1].version == "11" - assert rpms[1].arch == "x86_64" + assert rpms[1].release == "el8" + assert rpms[1].arch == "i686" + + +# test data setup for test_keep_n_latest_rpms_multiple_arches_different_n() +def _testdata(n): + unit_x86_64_lower = get_ubi_unit( + RpmUnit, + "test_repo_id", + name="test", + version="10", + release="el8", + arch="x86_64", + ) + unit_x86_64_higher = get_ubi_unit( + RpmUnit, + "test_repo_id", + name="test", + version="11", + release="el8", + arch="x86_64", + ) + unit_i686_lower = get_ubi_unit( + RpmUnit, + "test_repo_id", + name="test", + version="10", + release="el8", + arch="i686", + ) + unit_i686_higher = get_ubi_unit( + RpmUnit, + "test_repo_id", + name="test", + version="11", + release="el8", + arch="i686", + ) + unit_noarch = get_ubi_unit( + RpmUnit, + "test_repo_id", + name="test", + version="12", + release="el8", + arch="noarch", + ) + if n == 1: + # unit with arch: noarch will only be in output + out = [unit_noarch] + elif n == 2: + # in addition to noarch, the units with x86_64 and i686 arches with the highest version + # will be in output + out = [unit_noarch, unit_i686_higher, unit_x86_64_higher] + elif n >= 3: + # all pkgs will be in output + out = [ + unit_noarch, + unit_i686_higher, + unit_x86_64_higher, + unit_i686_lower, + unit_x86_64_lower, + ] + + return ( + n, + [ + unit_x86_64_lower, + unit_i686_lower, + unit_x86_64_higher, + unit_i686_higher, + unit_noarch, + ], + out, + ) + + +@pytest.mark.parametrize( + "n, input, expected_result", + [_testdata(1), _testdata(2), _testdata(3)], +) +def test_keep_n_latest_rpms_multiple_arches_different_n(n, input, expected_result): + """Test keeping only the latest version of rpm for multiple arches + with different `n` parameter""" + rpms = input + _keep_n_latest_rpms(rpms, n) + # there should be specific number of rpms + assert len(rpms) == len(expected_result) + # check expected result, comparing sets because of unstable sort + assert set(rpms) == set(expected_result) def test_flatten_list_of_sets(): diff --git a/ubi_manifest/worker/tasks/depsolver/utils.py b/ubi_manifest/worker/tasks/depsolver/utils.py index 03afd920..01f65564 100644 --- a/ubi_manifest/worker/tasks/depsolver/utils.py +++ b/ubi_manifest/worker/tasks/depsolver/utils.py @@ -103,7 +103,6 @@ def get_n_latest_from_content(content, blacklist, modular_rpms=None): out = [] for rpm_list in name_rpms_maps.values(): - rpm_list.sort(key=vercmp_sort()) _keep_n_latest_rpms(rpm_list) out.extend(rpm_list) @@ -187,10 +186,11 @@ def is_requirement_resolved(requirement, provider): def _keep_n_latest_rpms(rpms, n=1): """ - Keep n latest non-modular rpms. + Keep n latest non-modular rpms. If there are rpms with different arches + only pkgs with `n` highest versions are kept. Arguments: - rpms (List[Rpm]): Sorted, oldest goes first + rpms (List[Rpm]): List of rpms Keyword arguments: n (int): Number of non-modular package versions to keep @@ -201,8 +201,15 @@ def _keep_n_latest_rpms(rpms, n=1): # Use a queue of n elements per arch pkgs_per_arch = defaultdict(lambda: deque(maxlen=n)) - for rpm in rpms: - pkgs_per_arch[rpm.arch].append(rpm) + allowed_versions = set() + for rpm in sorted(rpms, key=vercmp_sort(), reverse=True): + allowed_versions.add(rpm.version) + + if len(allowed_versions) > n: + break + + if rpm.version in allowed_versions: + pkgs_per_arch[rpm.arch].append(rpm) latest_pkgs_per_arch = list(chain.from_iterable(pkgs_per_arch.values()))