Skip to content

Commit

Permalink
feat(ISV-5447): add multi-arch support to component sbom update (#333)
Browse files Browse the repository at this point in the history
Signed-off-by: Wai Cheang <[email protected]>
  • Loading branch information
wcheang authored Dec 5, 2024
1 parent 50b8874 commit 3e107df
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 20 deletions.
44 changes: 30 additions & 14 deletions sbom/test_update_component_sbom.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@


class TestUpdateComponentSBOM(unittest.TestCase):
def test_get_component_to_purls_map(self) -> None:
def test_get_component_to_purls_map_single_arch(self) -> None:
release_note_images = [
{"component": "comp1", "purl": "purl1"},
{"component": "comp1", "purl": "purl2"},
Expand All @@ -24,6 +24,26 @@ def test_get_component_to_purls_map(self) -> None:
"comp2": ["purl3"],
}

def test_get_component_to_purls_map_multi_arch(self) -> None:
release_note_images = [
{
"component": "comp1",
"purl": "pkg:oci/bar@sha256%3Aabcde?arch=amd64&repository_url=registry.io/foo",
"multiarch": True,
"arch": "amd64",
"imageSha": "foosha1",
},
]

result = get_component_to_purls_map(release_note_images)
assert result == {
"comp1": ["pkg:oci/bar@sha256%3Afoosha1?repository_url=registry.io/foo"],
"comp1_amd64": [
"pkg:oci/bar@sha256%3Afoosha1?arch=amd64&repository_url=registry.io/foo",
"pkg:oci/bar@sha256%3Aabcde?repository_url=registry.io/foo",
],
}

def test_update_cyclonedx_sbom(self) -> None:
sbom = {
"metadata": {
Expand Down Expand Up @@ -98,11 +118,6 @@ def test_update_spdx_sbom(self) -> None:
{
"name": "comp1",
"externalRefs": [
{
"referenceCategory": "PACKAGE-MANAGER",
"referenceType": "purl",
"referenceLocator": "pkg:oci/package@sha256:123",
},
{
"referenceCategory": "PACKAGE-MANAGER",
"referenceType": "purl",
Expand All @@ -120,11 +135,6 @@ def test_update_spdx_sbom(self) -> None:
{
"name": "comp2",
"externalRefs": [
{
"referenceCategory": "PACKAGE-MANAGER",
"referenceType": "purl",
"referenceLocator": "pkg:oci/package@sha256:456",
},
{
"referenceCategory": "PACKAGE-MANAGER",
"referenceType": "purl",
Expand Down Expand Up @@ -155,7 +165,10 @@ def test_update_sboms_with_cyclonedex_format(
) -> None:
# combining the content of data.json and sbom, since there can only be one read_data
# defined in the mock_open
test_cyclonedx_sbom = {"bomFormat": "CycloneDX", "releaseNotes": {"images": "foo"}}
test_cyclonedx_sbom = {
"bomFormat": "CycloneDX",
"releaseNotes": {"content": {"images": "foo"}},
}

with patch(
"builtins.open", mock_open(read_data=json.dumps(test_cyclonedx_sbom))
Expand All @@ -182,7 +195,7 @@ def test_update_sboms_with_spdx_format(
) -> None:
# combining the content of data.json and sbom, since there can only be one read_data
# defined in the mock_open
test_spdx_sbom = {"spdxVersion": "2.3", "releaseNotes": {"images": "foo"}}
test_spdx_sbom = {"spdxVersion": "2.3", "releaseNotes": {"content": {"images": "foo"}}}

with patch(
"builtins.open", mock_open(read_data=json.dumps(test_spdx_sbom))
Expand All @@ -207,7 +220,10 @@ def test_update_sboms_with_wrong_format(
) -> None:
# combining the content of data.json and sbom, since there can only be one read_data
# defined in the mock_open
test_spdx_sbom = {"notSbom": "NoSbomVersion", "releaseNotes": {"images": "foo"}}
test_spdx_sbom = {
"notSbom": "NoSbomVersion",
"releaseNotes": {"content": {"images": "foo"}},
}

with patch(
"builtins.open", mock_open(read_data=json.dumps(test_spdx_sbom))
Expand Down
35 changes: 33 additions & 2 deletions sbom/update_component_sbom.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import os
from collections import defaultdict
from typing import DefaultDict, Dict, List
import re

from packageurl import PackageURL

Expand All @@ -19,6 +20,12 @@ def get_component_to_purls_map(images: List[Dict]) -> Dict[str, List[str]]:
"""
Get dictionary mapping component names to list of image purls.
If the image is single arch, just use the existing purls.
If the image is multi-arch, the purl formats are as follows (SPDX only):
- The index package has one purl with the index sha, and no arch info.
- The child packages have one purl with the index sha and arch info, and one purl with
the child image sha and no arch info.
Args:
images: List of image metadata from the given data.json.
Expand All @@ -30,7 +37,25 @@ def get_component_to_purls_map(images: List[Dict]) -> Dict[str, List[str]]:
for image in images:
component = image["component"]
purl = image["purl"]
component_purls[component].append(purl)
arch = image.get("arch")
multiarch = image.get("multiarch", False)

if multiarch and arch:
# replace sha for index purl
index_sha = image.get("imageSha")
if index_sha:
index_purl = re.sub("sha256%3A.*\\?", f"sha256%3A{index_sha}?", purl)

# the index purl needs no arch info
component_purls[component] = [re.sub("arch=.*&|&arch=.*$", "", index_purl)]

component_purls[f"{component}_{arch}"].append(index_purl)
# remove arch from child image digest, since it's already in index purl
component_purls[f"{component}_{arch}"].append(
re.sub("arch=.*&|&arch=.*$", "", purl)
)
else:
component_purls[component].append(purl)

LOG.debug("Component to purl mapping: %s", component_purls)
return dict(component_purls)
Expand Down Expand Up @@ -84,6 +109,10 @@ def update_spdx_sbom(sbom: Dict, component_to_purls_map: Dict[str, List[str]]) -
LOG.info("Updating SPDX sbom")
for package in sbom["packages"]:
if package["name"] in component_to_purls_map:
# Remove existing purls that contain internal repo info
package["externalRefs"] = list(
filter(lambda n: n.get("referenceType") != "purl", package["externalRefs"])
)
purls = component_to_purls_map[package["name"]]
purl_external_refs = [
{
Expand Down Expand Up @@ -111,7 +140,9 @@ def update_sboms(data_path: str, input_path: str, output_path: str) -> None:
with open(data_path, "r") as data_file:
data = json.load(data_file)

component_to_purls_map = get_component_to_purls_map(data["releaseNotes"]["images"])
component_to_purls_map = get_component_to_purls_map(
data["releaseNotes"]["content"].get("images", [])
)
# get all json files in input dir
input_jsons = glob.glob(os.path.join(input_path, "*.json"))
# loop through files
Expand Down
9 changes: 5 additions & 4 deletions utils/get-image-architectures
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ if [ "$ARTIFACT_TYPE" != "null" ] ; then
# Just report that the image is for linux/amd64, which is not exactly true - but,
# downstream release-service-catalog tasks expect to find something. Use this as a default.
jq -cr -n --arg digest "$digest" \
'{"platform": {"architecture": "amd64", "os": "linux"}, "digest": $ARGS.named["digest"]}'
'{"platform": {"architecture": "amd64", "os": "linux"}, "digest": $ARGS.named["digest"], "multiarch": false}'
elif [ $(jq -r '.mediaType' <<< $RAW_OUTPUT) == "application/vnd.oci.image.manifest.v1+json" ] ; then
# Single arch, so need to run skopeo inspect again without --raw
RAW_OUTPUT=$(skopeo inspect --no-tags docker://${IMAGE})
Expand All @@ -46,7 +46,7 @@ elif [ $(jq -r '.mediaType' <<< $RAW_OUTPUT) == "application/vnd.oci.image.manif
digest=$(jq -r '.Digest' <<< $RAW_OUTPUT)

jq -cr -n --arg architecture "$architecture" --arg os "$os" --arg digest "$digest" \
'{"platform": {"architecture": $ARGS.named["architecture"], "os": $ARGS.named["os"]}, "digest": $ARGS.named["digest"]}'
'{"platform": {"architecture": $ARGS.named["architecture"], "os": $ARGS.named["os"]}, "digest": $ARGS.named["digest"], "multiarch": false}'
elif [ $(jq -r '.mediaType' <<< $RAW_OUTPUT) == "application/vnd.docker.distribution.manifest.v2+json" ] ; then
RAW_OUTPUT=$(skopeo inspect --no-tags docker://${IMAGE})
architecture=$(jq -r '.Architecture // ""' <<< $RAW_OUTPUT)
Expand All @@ -56,8 +56,9 @@ elif [ $(jq -r '.mediaType' <<< $RAW_OUTPUT) == "application/vnd.docker.distribu
# tekton bundles produced by tkn do not set the architecture or OS so
# default to linux/amd64
jq -cr -n --arg architecture "${architecture:-amd64}" --arg os "${os:-linux}" --arg digest "$digest" \
'{"platform": {"architecture": $ARGS.named["architecture"], "os": $ARGS.named["os"]}, "digest": $ARGS.named["digest"]}'
'{"platform": {"architecture": $ARGS.named["architecture"], "os": $ARGS.named["os"]}, "digest": $ARGS.named["digest"], "multiarch": false}'
else
# Multi arch
jq -cr '.manifests[]' <<< $RAW_OUTPUT
manifests=$(jq '.manifests[] += {multiarch: true}' <<< "$RAW_OUTPUT")
jq -cr '.manifests[]' <<< "$manifests"
fi

0 comments on commit 3e107df

Please sign in to comment.