Skip to content

Commit

Permalink
Fix keeping n latest rpms for different arches [RHELDST-21855] (#58)
Browse files Browse the repository at this point in the history
* Fix keeping n latest rpms for different arches [RHELDST-21855]

Yum repositories can contain rpms with different arches of the same
package. We need to make sure that only the highest and the same version
of rpms are kept.

Previously output contained in some cases pkg.arch_x.v_1 and pkg.arch_y.v2
which is incorrect because of mixed versions and for some workflows
system can end up in the uninstallable state.

This change makes sure that only pkg.arch_x.v_1 and pkg_arch_y.v_1 (if
present) will be in output and it won't end with mixed versions.

This is also fixed for parameter n>=2 which not used currently in practice.

* [pre-commit.ci] auto fixes from black and isort code formatting

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
rbikar and pre-commit-ci[bot] authored Nov 23, 2023
1 parent f3a440b commit 6526f09
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 15 deletions.
108 changes: 98 additions & 10 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,44 +99,43 @@ def test_keep_n_latest_rpms():
assert rpms[0].version == "11"


def test_keep_n_latest_rpms_multiple_arches():
def test_keep_n_latest_rpms_multiple_arches_default_n():
"""Test keeping only the latest version of rpm for multiple arches"""

unit_1 = get_ubi_unit(
RpmUnit,
"test_repo_id",
name="test",
version="10",
release="20",
release="el8",
arch="x86_64",
)
unit_2 = get_ubi_unit(
RpmUnit,
"test_repo_id",
name="test",
version="11",
release="20",
release="el8",
arch="x86_64",
)
unit_3 = get_ubi_unit(
RpmUnit,
"test_repo_id",
name="test",
version="10",
release="20",
release="el8",
arch="i686",
)
unit_4 = get_ubi_unit(
RpmUnit,
"test_repo_id",
name="test",
version="9",
release="20",
version="11",
release="el8",
arch="i686",
)

rpms = [unit_1, unit_2, unit_3, unit_4]
rpms.sort(key=vercmp_sort())
_keep_n_latest_rpms(rpms)

# sort by version, the order after _keep_n_latest_rpms() is not guaranteed in this case
Expand All @@ -146,12 +145,101 @@ def test_keep_n_latest_rpms_multiple_arches():
assert len(rpms) == 2

# i686 rpm goes with its highest version
assert rpms[0].version == "10"
assert rpms[0].arch == "i686"
assert rpms[0].version == "11"
assert rpms[0].release == "el8"
assert rpms[0].arch == "x86_64"

# x86_64 rpm goes with its highest version
assert rpms[1].version == "11"
assert rpms[1].arch == "x86_64"
assert rpms[1].release == "el8"
assert rpms[1].arch == "i686"


# test data setup for test_keep_n_latest_rpms_multiple_arches_different_n()
def _testdata(n):
unit_x86_64_lower = get_ubi_unit(
RpmUnit,
"test_repo_id",
name="test",
version="10",
release="el8",
arch="x86_64",
)
unit_x86_64_higher = get_ubi_unit(
RpmUnit,
"test_repo_id",
name="test",
version="11",
release="el8",
arch="x86_64",
)
unit_i686_lower = get_ubi_unit(
RpmUnit,
"test_repo_id",
name="test",
version="10",
release="el8",
arch="i686",
)
unit_i686_higher = get_ubi_unit(
RpmUnit,
"test_repo_id",
name="test",
version="11",
release="el8",
arch="i686",
)
unit_noarch = get_ubi_unit(
RpmUnit,
"test_repo_id",
name="test",
version="12",
release="el8",
arch="noarch",
)
if n == 1:
# unit with arch: noarch will only be in output
out = [unit_noarch]
elif n == 2:
# in addition to noarch, the units with x86_64 and i686 arches with the highest version
# will be in output
out = [unit_noarch, unit_i686_higher, unit_x86_64_higher]
elif n >= 3:
# all pkgs will be in output
out = [
unit_noarch,
unit_i686_higher,
unit_x86_64_higher,
unit_i686_lower,
unit_x86_64_lower,
]

return (
n,
[
unit_x86_64_lower,
unit_i686_lower,
unit_x86_64_higher,
unit_i686_higher,
unit_noarch,
],
out,
)


@pytest.mark.parametrize(
"n, input, expected_result",
[_testdata(1), _testdata(2), _testdata(3)],
)
def test_keep_n_latest_rpms_multiple_arches_different_n(n, input, expected_result):
"""Test keeping only the latest version of rpm for multiple arches
with different `n` parameter"""
rpms = input
_keep_n_latest_rpms(rpms, n)
# there should be specific number of rpms
assert len(rpms) == len(expected_result)
# check expected result, comparing sets because of unstable sort
assert set(rpms) == set(expected_result)


def test_flatten_list_of_sets():
Expand Down
17 changes: 12 additions & 5 deletions ubi_manifest/worker/tasks/depsolver/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ def get_n_latest_from_content(content, blacklist, modular_rpms=None):

out = []
for rpm_list in name_rpms_maps.values():
rpm_list.sort(key=vercmp_sort())
_keep_n_latest_rpms(rpm_list)
out.extend(rpm_list)

Expand Down Expand Up @@ -187,10 +186,11 @@ def is_requirement_resolved(requirement, provider):

def _keep_n_latest_rpms(rpms, n=1):
"""
Keep n latest non-modular rpms.
Keep n latest non-modular rpms. If there are rpms with different arches
only pkgs with `n` highest versions are kept.
Arguments:
rpms (List[Rpm]): Sorted, oldest goes first
rpms (List[Rpm]): List of rpms
Keyword arguments:
n (int): Number of non-modular package versions to keep
Expand All @@ -201,8 +201,15 @@ def _keep_n_latest_rpms(rpms, n=1):
# Use a queue of n elements per arch
pkgs_per_arch = defaultdict(lambda: deque(maxlen=n))

for rpm in rpms:
pkgs_per_arch[rpm.arch].append(rpm)
allowed_versions = set()
for rpm in sorted(rpms, key=vercmp_sort(), reverse=True):
allowed_versions.add(rpm.version)

if len(allowed_versions) > n:
break

if rpm.version in allowed_versions:
pkgs_per_arch[rpm.arch].append(rpm)

latest_pkgs_per_arch = list(chain.from_iterable(pkgs_per_arch.values()))

Expand Down

0 comments on commit 6526f09

Please sign in to comment.