Skip to content

Commit

Permalink
Mark all kickstart non-RPM content as phase2 [RHELDST-28021]
Browse files Browse the repository at this point in the history
Kickstart repos don't adhere to the "mutable entry point, immutable
everything else" design of other content types, such as yum and
file repos. For example, files such as "/EULA", "/images/install.img"
may receive in-place updates.

As such, the usual categorization between phase1 and phase2 content
based on entry points does not fit these repos correctly.

From the point of view of exodus-gw, the set of files receiving
in-place updates is arbitrary, so we'd better treat them *all* as phase
2 content and try to update them together. RPM files are the only
exception, as we still expect that an RPM published to the same
path is always the same thing.

The effects of categorizing kickstart files as phase2 content include:

- on publish, CDN cache will now be flushed for all non-RPM kickstart
  paths (rather than only the entry points).

- on publish, non-RPM kickstart paths will be recorded into
  PublishedPath table, which allows them to trigger cache flush again
  when releasever_alias are updated in config. (This is the primary
  motivation for the change, as this important cache flush currently
  must be done manually.)

- in-place updates to the repos are now more likely to appear as atomic
  from the customer's point of view.

- kickstart publishes may be somewhat slower, as it is necessary to
  delay more of the publishing work until the user requests the final
  commit.
  • Loading branch information
rohanpm committed Nov 25, 2024
1 parent 271e2e4 commit 1b675bc
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 7 deletions.
18 changes: 18 additions & 0 deletions exodus_gw/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,24 @@ class Settings(BaseSettings):
]
"""List of file names that should be saved for last when publishing."""

phase2_patterns: list[re.Pattern[str]] = [
# kickstart repos; note the logic here matches
# the manual workaround RHELDST-27642
re.compile(r"/kickstart/.*(?<!\.rpm)$"),
]
"""List of patterns which, if any have matched, force a path to
be handled during phase 2 of commit.
These patterns are intended for use with repositories not cleanly
separated between mutable entry points and immutable content.
For example, in-place updates to kickstart repositories may not
only modify entry points such as extra_files.json but also
arbitrary files referenced by that entry point, all of which should
be processed during phase 2 of commit in order for updates to
appear atomic.
"""

autoindex_filename: str = ".__exodus_autoindex"
"""Filename for indexes automatically generated during publish.
Expand Down
26 changes: 22 additions & 4 deletions exodus_gw/worker/publish.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,27 @@ def check_item(self, item: Item):
# link_to resolution logic.
raise ValueError("BUG: missing object_key for %s" % item.web_uri)

def is_phase2(self, item: Item) -> bool:
# Return True if item should be handled in phase 2 of commit.
name = basename(item.web_uri)
if (
name == self.settings.autoindex_filename
or name in self.settings.entry_point_files
):
# Typical entrypoint
return True

for pattern in self.settings.phase2_patterns:
# Arbitrary patterns from settings.
# e.g. this is where /kickstart/ is expected to be handled.
if pattern.search(item.web_uri):
LOG.debug(
"%s: phase2 forced via pattern %s", item.web_uri, pattern
)
return True

return False

@property
def item_select(self):
# Returns base of the SELECT query to find all items for commit.
Expand Down Expand Up @@ -334,9 +355,6 @@ def write_publish_items(self) -> list[Item]:

# Save any entry point items to publish last.
final_items: list[Item] = []
final_basenames = self.settings.entry_point_files + [
self.settings.autoindex_filename
]

wrote_count = 0

Expand All @@ -358,7 +376,7 @@ def write_publish_items(self) -> list[Item]:

self.check_item(item)

if basename(item.web_uri) in final_basenames:
if self.is_phase2(item):
LOG.debug(
"Delayed write for %s",
item.web_uri,
Expand Down
32 changes: 29 additions & 3 deletions tests/worker/test_publish.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ def test_commit(
# Note that this does not include paths after alias resolution,
# because Flusher does alias resolution itself internally
# (tested elsewhere)
# Note that the default phase2_patterns cause all non-RPM kickstart
# files to be flushed.
"/content/testproduct/1/kickstart/EULA",
"/content/testproduct/1/kickstart/extra_files.json",
"/content/testproduct/1/repo/",
"/content/testproduct/1/repo/repomd.xml",
Expand Down Expand Up @@ -150,6 +153,22 @@ def test_commit(
"test",
"/content/testproduct/rhui/1/kickstart/extra_files.json",
),
(
"test",
"/content/testproduct/1.1.0/kickstart/EULA",
),
(
"test",
"/content/testproduct/1/kickstart/EULA",
),
(
"test",
"/content/testproduct/rhui/1.1.0/kickstart/EULA",
),
(
"test",
"/content/testproduct/rhui/1/kickstart/EULA",
),
("test", "/content/testproduct/1/repo/"),
("test", "/content/testproduct/1.1.0/repo/"),
("test", "/content/testproduct/1/repo/repomd.xml"),
Expand Down Expand Up @@ -603,8 +622,15 @@ def test_commit_phase1(
[
{"dirty": False, "web_uri": "/other/path"},
{"dirty": False, "web_uri": "/some/path"},
# kickstart content:
# - EULA, though not an entrypoint, was forcibly delayed to phase2
# via phase2_patterns setting. And therefore is still 'dirty'.
# - RPMs aren't matched by phase2_patterns and can still be
# processed in phase1.
# - extra_files.json is an entrypoint and so is also delayed until
# phase2.
{
"dirty": False,
"dirty": True,
"web_uri": "/content/testproduct/1/kickstart/EULA",
},
{
Expand Down Expand Up @@ -633,7 +659,7 @@ def test_commit_phase1(

# It should have told us how many it wrote and how many remain
assert (
"Phase 1: committed 4 items, phase 2: 3 items remaining" in caplog.text
"Phase 1: committed 3 items, phase 2: 4 items remaining" in caplog.text
)

# Let's do the same commit again...
Expand All @@ -651,7 +677,7 @@ def test_commit_phase1(
# This time there should not have been any phase1 items processed at all,
# as none of them were dirty.
assert (
"Phase 1: committed 0 items, phase 2: 3 items remaining" in caplog.text
"Phase 1: committed 0 items, phase 2: 4 items remaining" in caplog.text
)

# And it should NOT have invoked the autoindex enricher in either commit
Expand Down

0 comments on commit 1b675bc

Please sign in to comment.