From 34af728ac3ea7e9b697f6ecb17a16346bf5a4065 Mon Sep 17 00:00:00 2001 From: Rohan McGovern Date: Mon, 13 May 2024 10:02:46 +1000 Subject: [PATCH 1/2] safety: update suppressed vulnerabilities - drop one old expired sqlalchemy exception - add a pip exception for a CVE which is not exploitable for this service --- .safety-policy.yml | 45 ++++++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/.safety-policy.yml b/.safety-policy.yml index 9506001f..92bb2946 100644 --- a/.safety-policy.yml +++ b/.safety-policy.yml @@ -1,20 +1,27 @@ security: - ignore-cvss-severity-below: 4 - ignore-cvss-unknown-severity: False - ignore-vulnerabilities: - 51668: - # PVE-2022-51668, sqlalchemy str(engine.URL()) can leak password. - reason: >- - Our own code does not currently trigger any leaks. - We *should* fix the issue, but there is no stable release of - sqlalchemy 2 at time of writing. - See RHELDST-15252. - expires: '2023-03-01' - 65213: - # CVE-2023-6129, pyopenssl>=22.0.0, - # POLY1305 MAC issue on PowerPC CPUs - reason: >- - Vulnerability is specific to PPC architecture, which is not - used or relevant for this service. - expires: '2025-04-04' - continue-on-vulnerability-error: False + ignore-cvss-severity-below: 4 + ignore-cvss-unknown-severity: False + ignore-vulnerabilities: + 65213: + # CVE-2023-6129, pyopenssl>=22.0.0, + # POLY1305 MAC issue on PowerPC CPUs + reason: >- + Vulnerability is specific to PPC architecture, which is not + used or relevant for this service. + expires: "2025-04-04" + 67599: + # CVE-2018-20225, pip: + # + # ** DISPUTED ** An issue was discovered in pip (all versions) because + # it installs the version with the highest version number, even if the + # user had intended to obtain a private package from a private index. + # This only affects use of the --extra-index-url option, and + # exploitation requires that the package does not already exist in the + # public index (and thus the attacker can put the package there with + # an arbitrary version number). NOTE: it has been reported that this + # is intended functionality and the user is responsible for using + # --extra-index-url securely. + # + reason: >- + Not exploitable: all dependencies exist on the public index. + continue-on-vulnerability-error: False From cf6858c732f7ae5651558a06f0f63f7a870ef6ce Mon Sep 17 00:00:00 2001 From: Rohan McGovern Date: Mon, 13 May 2024 09:39:42 +1000 Subject: [PATCH 2/2] Make it possible to bypass /origin/files policy [RHELDST-22253] RHELDST-23443 started to enforce that content published under /origin/files must abide by the established naming scheme (e.g. checksum in path must match checksum of content). Problem: content from legacy storage is still being migrated via exodus-gw, and some of that content fails to comply. We should permit migration of such content as-is. As there is a use-case for bypassing these checks, but only for the user performing the migration, add a new role supporting this. If the calling user has e.g. "live-ignore-policy", they will be permitted to bypass this specific check in "live". This will be granted to the user performing the migration. The relevant code was rewritten a bit and placed next to the other validation code so that it's cleaner to catch/ignore the exception when needed. --- exodus_gw/deps.py | 2 + exodus_gw/routers/publish.py | 80 +++++------------------------------ exodus_gw/schemas.py | 71 ++++++++++++++++++++++++++++++- tests/routers/test_publish.py | 43 +++++++++++++++++++ 4 files changed, 126 insertions(+), 70 deletions(-) diff --git a/exodus_gw/deps.py b/exodus_gw/deps.py index d8a6c668..406484bd 100644 --- a/exodus_gw/deps.py +++ b/exodus_gw/deps.py @@ -8,6 +8,7 @@ from fastapi import Depends, HTTPException, Path, Query, Request from .auth import call_context as get_call_context +from .auth import caller_roles as get_caller_roles from .aws.client import S3ClientWrapper from .settings import Environment, Settings, get_environment @@ -121,6 +122,7 @@ async def get_deadline_from_query( # db = Depends(get_db) call_context = Depends(get_call_context) +caller_roles = Depends(get_caller_roles) env = Depends(get_environment_from_path) deadline = Depends(get_deadline_from_query) settings = Depends(get_settings) diff --git a/exodus_gw/routers/publish.py b/exodus_gw/routers/publish.py index e6067a60..b0833d55 100644 --- a/exodus_gw/routers/publish.py +++ b/exodus_gw/routers/publish.py @@ -205,6 +205,7 @@ def update_publish_items( db: Session = deps.db, settings: Settings = deps.settings, call_context: auth.CallContext = deps.call_context, + caller_roles: set[str] = deps.caller_roles, ) -> dict[None, None]: """Add publish items to an existing publish object. @@ -273,75 +274,16 @@ def update_publish_items( db_publish.resolve_links(ln_items=resolvable) - # Enforce correct usage of the /origin/files directory layout. - # - # Paths published under /origin/files must always match the format: - # /origin/files/sha256/(first two characters of sha256sum)/(full sha256sum)/(basename) - # - # Additionally, every object_key must either be "absent" or equal to the full sha256sum - # present in the web_uri. - origin_base_regex = re.compile("^(/content)?/origin/files/sha256/.*$") - origin_pattern = ( - "^(/content)?/origin/files/sha256/[0-f]{2}/[0-f]{64}/[^/]{1,300}$" - ) - origin_regex = re.compile(origin_pattern) - origin_items = [i for i in items if re.match(origin_base_regex, i.web_uri)] - for i in origin_items: - # All content under /origin/files/sha256 must match the regex - if not re.match(origin_regex, i.web_uri): - LOG.error( - "Origin path %s does not match regex %s", - i.web_uri, - origin_pattern, - extra={ - "publish_id": publish_id, - "event": "publish", - "success": False, - }, - ) - raise HTTPException( - 400, - detail="Origin path %s does not match regex %s" - % (i.web_uri, origin_pattern), - ) - # Verify that the two-character partial sha256sum matches the first two characters of the - # full sha256sum. - parts = i.web_uri.partition("/files/sha256/")[2].split("/") - if not parts[1].startswith(parts[0]): - LOG.error( - "Origin path %s contains mismatched sha256sum (%s, %s)", - i.web_uri, - parts[0], - parts[1], - extra={ - "publish_id": publish_id, - "event": "publish", - "success": False, - }, - ) - raise HTTPException( - 400, - detail="Origin path %s contains mismatched sha256sum (%s, %s)" - % (i.web_uri, parts[0], parts[1]), - ) - # Ensure every object_key is either "absent" or is equal to the full sha256sum - # present in the web_uri. - if not i.object_key in ("absent", parts[1]): - LOG.error( - "Invalid object_key %s for web_uri %s", - i.object_key, - i.web_uri, - extra={ - "publish_id": publish_id, - "event": "publish", - "success": False, - }, - ) - raise HTTPException( - 400, - detail="Invalid object_key %s for web_uri %s" - % (i.object_key, i.web_uri), - ) + # Apply additional policy checks to the incoming items (e.g. do paths + # abide by certain conventions). Users with a certain role are allowed + # to bypass these checks. + can_ignore_policy = f"{env.name}-ignore-policy" in caller_roles + for item in items: + try: + item.validate_policy() + except schemas.ItemPolicyError: + if not can_ignore_policy: + raise # Prevent unauthorized users from publishing to restricted paths within # a particular CDN environment. diff --git a/exodus_gw/schemas.py b/exodus_gw/schemas.py index cf99b4b5..6fa0c601 100644 --- a/exodus_gw/schemas.py +++ b/exodus_gw/schemas.py @@ -1,14 +1,17 @@ +import logging import re from datetime import datetime from enum import Enum from os.path import join, normpath from uuid import UUID -from fastapi import Path +from fastapi import HTTPException, Path from pydantic import BaseModel, Field, model_validator from .settings import Settings +LOG = logging.getLogger("exodus-gw") + PathPublishId = Path( ..., title="publish ID", @@ -32,11 +35,30 @@ def normalize_path(path: str): # TYPE/SUBTYPE[+SUFFIX][;PARAMETER=VALUE] MIMETYPE_PATTERN = re.compile(r"^[-\w]+/[-.\w]+(\+[-\w]*)?(;[-\w]+=[-\w]+)?") +# Pattern matching anything under /origin/files/sha256 subtree +ORIGIN_FILES_BASE_PATTERN = re.compile("^(/content)?/origin/files/sha256/.*$") + +# Pattern which all files under the above base *should* match in order to avoid +# a validation error +ORIGIN_FILES_PATTERN = re.compile( + "^(/content)?/origin/files/sha256/[0-f]{2}/[0-f]{64}/[^/]{1,300}$" +) + + # Note: it would be preferable if we could reuse a settings object loaded by the # app, however we need this value from within a @classmethod validator. AUTOINDEX_FILENAME = Settings().autoindex_filename +class ItemPolicyError(HTTPException): + """Exception type raised when an item provided by the user is + structurally valid but fails to comply with certain policies. + """ + + def __init__(self, message: str): + super().__init__(400, detail=message) + + class ItemBase(BaseModel): web_uri: str = Field( ..., @@ -114,6 +136,53 @@ def validate_item(self) -> "ItemBase": return self + def validate_policy(self): + # Validate additional properties of the item against certain + # embedded policies. + # + # It's a little clumsy that this cannot happen in the @model_validator + # above. The point is that certain users are allowed to bypass the + # policy here, whereas the @model_validator is applied too early and + # too strictly to allow any bypassing. + self.validate_origin_files() + + def validate_origin_files(self): + # Enforce correct usage of the /origin/files directory layout. + if not ORIGIN_FILES_BASE_PATTERN.match(self.web_uri): + # Not under /origin/files => passes this validation + return + + # OK, it exists under /origin/files. + # + # Paths published under /origin/files must always match the format: + # /origin/files/sha256/(first two characters of sha256sum)/(full sha256sum)/(basename) + # + def policy_error(message: str): + LOG.warning(message) + raise ItemPolicyError(message) + + # All content under /origin/files/sha256 must match the regex + if not re.match(ORIGIN_FILES_PATTERN, self.web_uri): + policy_error( + f"Origin path {self.web_uri} does not match regex {ORIGIN_FILES_PATTERN.pattern}" + ) + + # Verify that the two-character partial sha256sum matches the first two characters of the + # full sha256sum. + parts = self.web_uri.partition("/files/sha256/")[2].split("/") + if not parts[1].startswith(parts[0]): + policy_error( + f"Origin path {self.web_uri} contains mismatched sha256sum " + f"({parts[0]}, {parts[1]})" + ) + + # Additionally, every object_key must either be "absent" or equal to the full sha256sum + # present in the web_uri. + if not self.object_key in ("absent", parts[1]): + policy_error( + f"Invalid object_key {self.object_key} for web_uri {self.web_uri}" + ) + class Item(ItemBase): publish_id: UUID = Field( diff --git a/tests/routers/test_publish.py b/tests/routers/test_publish.py index 956087e9..a105f283 100644 --- a/tests/routers/test_publish.py +++ b/tests/routers/test_publish.py @@ -1499,6 +1499,49 @@ def test_update_invalid_path_unmatched_regex(db, auth_header): } +def test_update_invalid_origin_files_bypassed( + db, auth_header, caplog: pytest.LogCaptureFixture +): + """When a user publishes to a /content/origin/ path and violates the policy, + the violation is allowed if the user has {env}-ignore-policy role. + """ + + publish_id = "11224567-e89b-12d3-a456-426614174000" + + publish = Publish(id=publish_id, env="test", state="PENDING") + + db.add(publish) + db.commit() + + with TestClient(app) as client: + # Try to add some items to it + r = client.put( + "/test/publish/%s" % publish_id, + json=[ + { + "web_uri": "/content/origin/files/sha256/01/44/0144062dca731c0d5c24148722537e181d752ca8cda0097005f9268a51658b0a/test.rpm", + "object_key": "0144062dca731c0d5c24148722537e181d752ca8cda0097005f9268a51658b0a", + "content_type": "application/octet-stream", + }, + ], + headers=auth_header( + roles=["test-publisher", "test-ignore-policy"] + ), + ) + + # It should have succeeded + assert r.status_code == 200 + + # But at least warned about the suspicious path + assert ( + "Origin path {} does not match regex {}".format( + "/content/origin/files/sha256/01/44/0144062dca731c0d5c24148722537e181d752ca8cda0097005f9268a51658b0a/test.rpm", + "^(/content)?/origin/files/sha256/[0-f]{2}/[0-f]{64}/[^/]{1,300}$", + ) + in caplog.messages + ) + + def test_update_invalid_path_sha256sum_mismatch(db, auth_header): """When a user publishes to a /content/origin/ path, ensure that the two-character portion of the web_uri matches the first two characters of the sha256sum portion of the