Skip to content

Commit

Permalink
chore: validate input repo, commit, provenance to ensure they match (#…
Browse files Browse the repository at this point in the history
…739)

Signed-off-by: Ben Selwyn-Smith <[email protected]>
  • Loading branch information
benmss authored Jun 4, 2024
1 parent 44ad0e1 commit da182aa
Show file tree
Hide file tree
Showing 20 changed files with 329 additions and 140 deletions.
14 changes: 11 additions & 3 deletions scripts/dev_scripts/integration_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -707,7 +707,7 @@ JSON_RESULT=$WORKSPACE/output/reports/github_com/slsa-framework/slsa-verifier/sl
EXPECTATION_FILE=$WORKSPACE/tests/slsa_analyzer/provenance/expectations/cue/resources/valid_expectations/slsa_verifier_PASS.cue
DEFAULTS_FILE=$WORKSPACE/tests/e2e/defaults/slsa_verifier.ini
PROVENANCE_FILE=$WORKSPACE/tests/slsa_analyzer/provenance/resources/valid_provenances/slsa-verifier-linux-amd64.intoto.jsonl
$RUN_MACARON -dp $DEFAULTS_FILE analyze -pe $EXPECTATION_FILE -pf $PROVENANCE_FILE -rp https://github.com/slsa-framework/slsa-verifier -b main -d fc50b662fcfeeeb0e97243554b47d9b20b14efac --skip-deps || log_fail
$RUN_MACARON -dp $DEFAULTS_FILE analyze -pe $EXPECTATION_FILE -pf $PROVENANCE_FILE -rp https://github.com/slsa-framework/slsa-verifier -d 6fb4f7e2dd9c2f5d4f55fa88f6796278a7bba6d6 --skip-deps || log_fail

check_or_update_expected_output $COMPARE_JSON_OUT $JSON_RESULT $JSON_EXPECTED || log_fail

Expand All @@ -719,7 +719,7 @@ JSON_RESULT=$WORKSPACE/output/reports/github_com/slsa-framework/slsa-verifier/sl
EXPECTATION_FILE=$WORKSPACE/tests/slsa_analyzer/provenance/expectations/cue/resources/valid_expectations/slsa_verifier_PASS.cue
DEFAULTS_FILE=$WORKSPACE/tests/e2e/defaults/allow_url_link_github.ini
PROVENANCE_FILE=$WORKSPACE/tests/slsa_analyzer/provenance/resources/valid_provenances/slsa-verifier-linux-amd64.intoto.jsonl
$RUN_MACARON -dp $DEFAULTS_FILE analyze -pe $EXPECTATION_FILE -pf $PROVENANCE_FILE -rp https://github.com/slsa-framework/slsa-verifier -b main -d fc50b662fcfeeeb0e97243554b47d9b20b14efac --skip-deps || log_fail
$RUN_MACARON -dp $DEFAULTS_FILE analyze -pe $EXPECTATION_FILE -pf $PROVENANCE_FILE -rp https://github.com/slsa-framework/slsa-verifier -d 6fb4f7e2dd9c2f5d4f55fa88f6796278a7bba6d6 --skip-deps || log_fail

check_or_update_expected_output $COMPARE_JSON_OUT $JSON_RESULT $JSON_EXPECTED || log_fail

Expand Down Expand Up @@ -762,7 +762,7 @@ check_or_update_expected_output $COMPARE_POLICIES $POLICY_RESULT $POLICY_EXPECTE

echo -e "\n----------------------------------------------------------------------------------"
echo "behnazh-w/example-maven-app as a local and remote repository"
echo "Test the Witness and GitHub provenances as an input, Cue expectation validation, Policy CLI and VSA generation."
echo "Test the Witness and GitHub provenances as an input, Cue expectation validation, Policy CLI and VSA generation, User input vs. provenance."
echo -e "----------------------------------------------------------------------------------\n"
RUN_POLICY="macaron verify-policy"
POLICY_FILE=$WORKSPACE/tests/policy_engine/resources/policies/example-maven-project/policy.dl
Expand Down Expand Up @@ -794,6 +794,14 @@ $RUN_POLICY -f $POLICY_FILE -d "$WORKSPACE/output/macaron.db" || log_fail
check_or_update_expected_output "$COMPARE_POLICIES" "$POLICY_RESULT" "$POLICY_EXPECTED" || log_fail
check_or_update_expected_output "$COMPARE_VSA" "$VSA_RESULT" "$VSA_PAYLOAD_EXPECTED" || log_fail

# Validate user input of repo and commit vs provenance.
$RUN_MACARON analyze -pf $GITHUB_PROVENANCE_FILE -rp https://github.com/behnazh-w/example-maven-app -d 2deca75ed5dd365eaf1558a82347b1f11306135f --skip-deps || log_fail

# Validate user input of repo and commit (via purl) vs provenance.
$RUN_MACARON analyze -pf $GITHUB_PROVENANCE_FILE -purl pkg:github/behnazh-w/example-maven-app@2deca75 --skip-deps || log_fail

# Validate user input of repo and commit (via purl with tag) vs provenance.
$RUN_MACARON analyze -pf $GITHUB_PROVENANCE_FILE -purl pkg:github/behnazh-w/[email protected] --skip-deps || log_fail

# Testing the Repo Finder's remote calls.
# This requires the 'packageurl' Python module
Expand Down
41 changes: 25 additions & 16 deletions src/macaron/json_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,26 @@

"""This module provides utility functions for JSON data."""
import logging
from collections.abc import Sequence
from typing import TypeVar

from macaron.util import JsonType

JsonType = int | float | str | None | bool | list["JsonType"] | dict[str, "JsonType"]
T = TypeVar("T", bound=JsonType)

logger: logging.Logger = logging.getLogger(__name__)


def json_extract(entry: JsonType, keys: list[str], type_: type[T]) -> T | None:
def json_extract(entry: dict | list, keys: Sequence[str | int], type_: type[T]) -> T | None:
"""Return the value found by following the list of depth-sequential keys inside the passed JSON dictionary.
The value must be of the passed type.
Parameters
----------
entry: JsonType
entry: dict | list
An entry point into a JSON structure.
keys: list[str]
The list of depth-sequential keys within the JSON.
keys: Sequence[str | int]
The sequence of depth-sequential keys within the JSON. Can be dict keys or list indices.
type: type[T]
The type to check the value against and return it as.
Expand All @@ -31,19 +31,28 @@ def json_extract(entry: JsonType, keys: list[str], type_: type[T]) -> T | None:
T | None:
The found value as the type of the type parameter.
"""
target = entry

for index, key in enumerate(keys):
if not isinstance(target, dict):
logger.debug("Expect the value .%s to be a dict.", ".".join(keys[:index]))
target: JsonType = entry
for key in keys:
if isinstance(target, dict) and isinstance(key, str):
if key not in target:
logger.debug("JSON key '%s' not found in dict target.", key)
return None
elif isinstance(target, list) and isinstance(key, int):
if key < 0 or key >= len(target):
logger.debug("JSON list index '%s' is outside of list bounds %s.", key, len(target))
return None
else:
logger.debug("Cannot index '%s' (type: %s) in target (type: %s).", key, type(key), type(target))
return None
if key not in target:
logger.debug("JSON key '%s' not found in .%s", key, ".".join(keys[:index]))
return None
target = target[key]

# If statement required for mypy to not complain. The else case can never happen because of the above if block.
if isinstance(target, dict) and isinstance(key, str):
target = target[key]
elif isinstance(target, list) and isinstance(key, int):
target = target[key]

if isinstance(target, type_):
return target

logger.debug("Expect the value .%s to be of type %s", ".".join(keys), type_)
logger.debug("Found value of incorrect type: %s instead of %s.", type(target), type(type_))
return None
2 changes: 1 addition & 1 deletion src/macaron/repo_finder/commit_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def extract_commit_from_version(git_obj: Git, version: str) -> str | None:
if 7 <= len(version) <= 40 and re.match(hex_only_pattern, version):
try:
commit = git_obj.get_commit(version)
except BadName as error:
except (BadName, ValueError) as error:
logger.debug("Failed to retrieve commit: %s", error)

if not commit:
Expand Down
155 changes: 143 additions & 12 deletions src/macaron/repo_finder/provenance_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,20 @@

"""This module contains methods for extracting repository and commit metadata from provenance files."""
import logging
import urllib.parse

from packageurl import PackageURL
from pydriller import Git

from macaron.errors import ProvenanceError
from macaron.json_tools import json_extract
from macaron.json_tools import JsonType, json_extract
from macaron.repo_finder.commit_finder import (
AbstractPurlType,
determine_abstract_purl_type,
extract_commit_from_version,
)
from macaron.repo_finder.repo_finder import to_domain_from_known_purl_types
from macaron.slsa_analyzer.provenance.intoto import InTotoPayload, InTotoV1Payload, InTotoV01Payload
from macaron.util import JsonType

logger: logging.Logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -67,16 +76,8 @@ def _extract_from_slsa_v01(payload: InTotoV01Payload) -> tuple[str | None, str |
if not list_index:
return None, None

material_list = json_extract(predicate, ["materials"], list)
if not material_list:
return None, None

if list_index >= len(material_list):
logger.debug("Material list index outside of material list bounds.")
return None, None

material = material_list[list_index]
if not material or not isinstance(material, dict):
material = json_extract(predicate, ["materials", list_index], dict)
if not material:
logger.debug("Indexed material list entry is invalid.")
return None, None

Expand Down Expand Up @@ -232,3 +233,133 @@ def _clean_spdx(uri: str) -> str:
"""
url, _, _ = uri.lstrip("git+").rpartition("@")
return url


def check_if_input_repo_commit_provenance_conflict(
repo_path_input: str | None,
digest_input: str | None,
provenance_repo_url: str | None,
provenance_commit_digest: str | None,
) -> bool:
"""Test if the input repo and commit match the contents of the provenance.
Parameters
----------
repo_path_input: str | None
The repo URL from input.
digest_input: str | None
The digest from input.
provenance_repo_url: str | None
The repo URL from provenance.
provenance_commit_digest: str | None
The commit digest from provenance.
Returns
-------
bool
True if there is a conflict between the inputs, False otherwise, or if the comparison cannot be performed.
"""
# Check the provenance repo against the input repo.
if repo_path_input and provenance_repo_url and repo_path_input != provenance_repo_url:
logger.debug(
"The repository URL from input does not match what exists in the provenance. "
"Input Repo: %s, Provenance Repo: %s.",
repo_path_input,
provenance_repo_url,
)
return True

# Check the provenance commit against the input commit.
if digest_input and provenance_commit_digest and digest_input != provenance_commit_digest:
logger.debug(
"The commit digest from input does not match what exists in the provenance. "
"Input Commit: %s, Provenance Commit: %s.",
digest_input,
provenance_commit_digest,
)
return True

return False


def check_if_input_purl_provenance_conflict(
git_obj: Git,
repo_path_input: bool,
digest_input: bool,
provenance_repo_url: str | None,
provenance_commit_digest: str | None,
purl: PackageURL,
) -> bool:
"""Test if the input repository type PURL's repo and commit match the contents of the provenance.
Parameters
----------
git_obj: Git
The Git object.
repo_path_input: bool
True if there is a repo as input.
digest_input: str
True if there is a commit as input.
provenance_repo_url: str | None
The repo url from provenance.
provenance_commit_digest: str | None
The commit digest from provenance.
purl: PackageURL
The input repository PURL.
Returns
-------
bool
True if there is a conflict between the inputs, False otherwise, or if the comparison cannot be performed.
"""
if determine_abstract_purl_type(purl) != AbstractPurlType.REPOSITORY:
return False

# Check the PURL repo against the provenance.
if not repo_path_input and provenance_repo_url:
if not check_if_repository_purl_and_url_match(provenance_repo_url, purl):
logger.debug(
"The repo url passed via purl input does not match what exists in the provenance. "
"Purl: %s, Provenance: %s.",
purl,
provenance_repo_url,
)
return True

# Check the PURL commit against the provenance.
if not digest_input and provenance_commit_digest and purl.version:
purl_commit = extract_commit_from_version(git_obj, purl.version)
if purl_commit and purl_commit != provenance_commit_digest:
logger.debug(
"The commit digest passed via purl input does not match what exists in the "
"provenance. Purl Commit: %s, Provenance Commit: %s.",
purl_commit,
provenance_commit_digest,
)
return True

return False


def check_if_repository_purl_and_url_match(url: str, repo_purl: PackageURL) -> bool:
"""Compare a repository PURL and URL for equality.
Parameters
----------
url: str
The URL.
repo_purl: PackageURL
A PURL that is of the repository abstract type. E.g. GitHub.
Returns
-------
bool
True if the two inputs match in terms of URL netloc/domain and path.
"""
expanded_purl_type = to_domain_from_known_purl_types(repo_purl.type)
parsed_url = urllib.parse.urlparse(url)
purl_path = repo_purl.name
if repo_purl.namespace:
purl_path = f"{repo_purl.namespace}/{purl_path}"
# Note that the urllib method includes the "/" before path while the PURL method does not.
return f"{parsed_url.hostname}{parsed_url.path}".lower() == f"{expanded_purl_type or repo_purl.type}/{purl_path}"
2 changes: 1 addition & 1 deletion src/macaron/repo_finder/repo_finder_deps_dev.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from packageurl import PackageURL

from macaron.repo_finder.provenance_extractor import json_extract
from macaron.json_tools import json_extract
from macaron.repo_finder.repo_finder_base import BaseRepoFinder
from macaron.repo_finder.repo_validator import find_valid_repository_url
from macaron.util import send_get_http_raw
Expand Down
Loading

0 comments on commit da182aa

Please sign in to comment.