Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(RELEASE-1345): avoid failing on invalid purl string #343

Merged
merged 1 commit into from
Dec 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions pyxis/test_upload_rpm_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
update_container_content_sets,
load_sbom_packages,
construct_rpm_items_and_content_sets,
get_purl_type,
)

GRAPHQL_API = "myapiurl"
Expand Down Expand Up @@ -458,3 +459,35 @@ def test_construct_rpm_items_and_content_sets__no_packages_result_in_empty_list(

assert rpms == []
assert content_sets == []


def test_get_purl_type__rpm():
purl = (
"pkg:rpm/rhel/[email protected]?arch=x86_64&upstream=acl-2.3.1-4.el9.src.rpm"
"&distro=rhel-9.4&repository_id=myrepo3"
)

type = get_purl_type(purl)

assert type == "rpm"


def test_get_purl_type__invalid_docker():
"""This is an invalid purl that packageurl.PackageURL.from_string() would fail on,
but we can still get the type successfully.
"""
purl = "pkg:github/docker:/#docker.mirror.hashicorp.services/rhysd/actionlint:latest"

type = get_purl_type(purl)

assert type == "github"


def test_get_purl_type__missing_type():
"""This is an invalid purl that does not have a type, so the function will throw
an exception.
"""
purl = "pkg:docker:#docker.mirror.hashicorp.services"

with pytest.raises(ValueError):
get_purl_type(purl)
33 changes: 33 additions & 0 deletions pyxis/test_upload_rpm_data_cyclonedx.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
load_sbom_components,
check_bom_ref_duplicates,
construct_rpm_items_and_content_sets,
get_purl_type,
)

GRAPHQL_API = "myapiurl"
Expand Down Expand Up @@ -435,3 +436,35 @@ def test_construct_rpm_items_and_content_sets__no_components_result_in_empty_lis

assert rpms == []
assert content_sets == []


def test_get_purl_type__rpm():
purl = (
"pkg:rpm/rhel/[email protected]?arch=x86_64&upstream=acl-2.3.1-4.el9.src.rpm"
"&distro=rhel-9.4&repository_id=myrepo3"
)

type = get_purl_type(purl)

assert type == "rpm"


def test_get_purl_type__invalid_docker():
"""This is an invalid purl that packageurl.PackageURL.from_string() would fail on,
but we can still get the type successfully.
"""
purl = "pkg:github/docker:/#docker.mirror.hashicorp.services/rhysd/actionlint:latest"

type = get_purl_type(purl)

assert type == "github"


def test_get_purl_type__missing_type():
"""This is an invalid purl that does not have a type, so the function will throw
an exception.
"""
purl = "pkg:docker:#docker.mirror.hashicorp.services"

with pytest.raises(ValueError):
get_purl_type(purl)
33 changes: 31 additions & 2 deletions pyxis/upload_rpm_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,9 +235,10 @@ def construct_rpm_items_and_content_sets(
for externalRef in package.get("externalRefs", []):
if externalRef.get("referenceType") != "purl":
continue
purl_dict = PackageURL.from_string(externalRef["referenceLocator"]).to_dict()
if purl_dict["type"] != "rpm":
type = get_purl_type(externalRef["referenceLocator"])
if type != "rpm":
continue
purl_dict = PackageURL.from_string(externalRef["referenceLocator"]).to_dict()
if purl_dict["name"] in IGNORED_PACKAGES:
continue
rpm_item = {
Expand Down Expand Up @@ -271,6 +272,34 @@ def construct_rpm_items_and_content_sets(
return rpms_items, sorted(content_sets)


def get_purl_type(purl: str):
"""
Return purl type parsed from a purl string.

Copied and adapted from packageurl package. The reason we need this function
and cannot simply use the type component of
packageurl.PackageURL.from_string(purl) is that there can be invalid non-rpm
purls generated by syft. By getting just the type first and skipping those
purls, we avoid failing on those invalid purls.

Raise ValueError on errors.
"""
scheme, sep, remainder = purl.partition(":")
if not sep or scheme != "pkg":
johnbieren marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError(f'purl is missing the required "pkg" scheme component: {repr(purl)}.')

# this strip '/, // and /// as possible in :// or :///
remainder = remainder.strip().lstrip("/")

type, sep, remainder = remainder.partition("/") # NOQA
if not type or not sep:
raise ValueError(f"purl is missing the required type component: {repr(purl)}.")

type = type.lower()

return type


def main(): # pragma: no cover
"""Main func"""
args = parse_arguments()
Expand Down
33 changes: 31 additions & 2 deletions pyxis/upload_rpm_data_cyclonedx.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,9 @@ def construct_rpm_items_and_content_sets(
content_sets = set()
for component in components:
if "purl" in component:
purl_dict = PackageURL.from_string(component["purl"]).to_dict()
if purl_dict["type"] == "rpm":
type = get_purl_type(component["purl"])
if type == "rpm":
purl_dict = PackageURL.from_string(component["purl"]).to_dict()
if purl_dict["name"] in IGNORED_PACKAGES:
continue
rpm_item = {
Expand Down Expand Up @@ -292,6 +293,34 @@ def construct_rpm_items_and_content_sets(
return rpms_items, sorted(content_sets)


def get_purl_type(purl: str):
mmalina marked this conversation as resolved.
Show resolved Hide resolved
"""
Return purl type parsed from a purl string.

Copied and adapted from packageurl package. The reason we need this function
and cannot simply use the type component of
packageurl.PackageURL.from_string(purl) is that there can be invalid non-rpm
purls generated by syft. By getting just the type first and skipping those
purls, we avoid failing on those invalid purls.

Raise ValueError on errors.
"""
scheme, sep, remainder = purl.partition(":")
if not sep or scheme != "pkg":
raise ValueError(f'purl is missing the required "pkg" scheme component: {repr(purl)}.')

# this strip '/, // and /// as possible in :// or :///
remainder = remainder.strip().lstrip("/")

type, sep, remainder = remainder.partition("/") # NOQA
if not type or not sep:
raise ValueError(f"purl is missing the required type component: {repr(purl)}.")

type = type.lower()

return type


def main(): # pragma: no cover
"""Main func"""
args = parse_arguments()
Expand Down
Loading