diff --git a/src/macaron/database/table_definitions.py b/src/macaron/database/table_definitions.py index 61c90da2e..035df8f31 100644 --- a/src/macaron/database/table_definitions.py +++ b/src/macaron/database/table_definitions.py @@ -103,7 +103,7 @@ class PackageURLMixin: name: Mapped[str] = mapped_column(String(100), nullable=False, comment="Name of the package.") #: Version of the package. - version: Mapped[str] = mapped_column(String(100), nullable=True, comment="Version of the package.") + version: Mapped[str | None] = mapped_column(String(100), nullable=True, comment="Version of the package.") #: Extra qualifying data for a package such as the name of an OS. qualifiers: Mapped[str] = mapped_column( diff --git a/src/macaron/errors.py b/src/macaron/errors.py index a3178e8da..b4e8b813f 100644 --- a/src/macaron/errors.py +++ b/src/macaron/errors.py @@ -86,3 +86,7 @@ class CycloneDXParserError(MacaronError): class DependencyAnalyzerError(MacaronError): """The DependencyAnalyzer error class.""" + + +class HeuristicAnalyzerValueError(MacaronError): + """Error class for BaseHeuristicAnalyzer errors when parsing data.""" diff --git a/src/macaron/malware_analyzer/pypi_heuristics/heuristics.py b/src/macaron/malware_analyzer/pypi_heuristics/heuristics.py index 0bd74d343..d3e574027 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/heuristics.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/heuristics.py @@ -31,6 +31,9 @@ class Heuristics(str, Enum): #: Indicates that the setup.py file contains suspicious imports, such as base64 and requests. SUSPICIOUS_SETUP = "suspicious_setup" + #: Indicates that the package does not include a .whl file + WHEEL_ABSENCE = "wheel_absence" + class HeuristicResult(str, Enum): """Result type indicating the outcome of a heuristic.""" diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/wheel_absence.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/wheel_absence.py new file mode 100644 index 000000000..618f26852 --- /dev/null +++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/wheel_absence.py @@ -0,0 +1,84 @@ +# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""The heuristic analyzer to check .whl file absence.""" + +import logging + +from macaron.errors import HeuristicAnalyzerValueError +from macaron.json_tools import JsonType +from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer +from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics +from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset + +logger: logging.Logger = logging.getLogger(__name__) + + +class WheelAbsenceAnalyzer(BaseHeuristicAnalyzer): + """ + Analyze to see if a .whl file is available for the package. + + If a package is distributed with a .whl file, this heuristic passes. Otherwise, the + heuristic fails. + """ + + WHEEL: str = "bdist_wheel" + + def __init__(self) -> None: + super().__init__( + name="wheel_absence_analyzer", + heuristic=Heuristics.WHEEL_ABSENCE, + depends_on=None, + ) + + def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicResult, dict[str, JsonType]]: + """Analyze the package. + + Parameters + ---------- + pypi_package_json: PyPIPackageJsonAsset + The PyPI package JSON asset object. + + Returns + ------- + tuple[HeuristicResult, dict[str, JsonType]]: + The result and related information collected during the analysis. + + Raises + ------ + HeuristicAnalyzerValueError + If there is no release information, or has no most recent version (if queried). + """ + releases = pypi_package_json.get_releases() + if releases is None: # no release information + error_msg = "There is no information for any release of this package." + logger.debug(error_msg) + raise HeuristicAnalyzerValueError(error_msg) + + version = pypi_package_json.component.version + if version is None: # check latest release version + version = pypi_package_json.get_latest_version() + + if version is None: + error_msg = "There is no latest version of this package." + logger.debug(error_msg) + raise HeuristicAnalyzerValueError(error_msg) + + release_files: list[JsonType] = [] + wheel_present: bool = False + + try: + for release_metadata in releases[version]: + if release_metadata["packagetype"] == self.WHEEL: + wheel_present = True + + release_files.append(release_metadata["filename"]) + except KeyError as error: + error_msg = f"The version {version} is not available as a release." + logger.debug(error_msg) + raise HeuristicAnalyzerValueError(error_msg) from error + + if wheel_present: + return HeuristicResult.PASS, {version: release_files} + + return HeuristicResult.FAIL, {version: release_files} diff --git a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py index 13522e38f..15daf8d65 100644 --- a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py +++ b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py @@ -11,6 +11,7 @@ from macaron.database.db_custom_types import DBJsonDict from macaron.database.table_definitions import CheckFacts +from macaron.errors import HeuristicAnalyzerValueError from macaron.json_tools import JsonType, json_extract from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics @@ -20,6 +21,7 @@ from macaron.malware_analyzer.pypi_heuristics.metadata.one_release import OneReleaseAnalyzer from macaron.malware_analyzer.pypi_heuristics.metadata.unchanged_release import UnchangedReleaseAnalyzer from macaron.malware_analyzer.pypi_heuristics.metadata.unreachable_project_links import UnreachableProjectLinksAnalyzer +from macaron.malware_analyzer.pypi_heuristics.metadata.wheel_absence import WheelAbsenceAnalyzer from macaron.malware_analyzer.pypi_heuristics.sourcecode.suspicious_setup import SuspiciousSetupAnalyzer from macaron.slsa_analyzer.analyze_context import AnalyzeContext from macaron.slsa_analyzer.build_tool.pip import Pip @@ -70,6 +72,7 @@ class MaliciousMetadataFacts(CheckFacts): UnchangedReleaseAnalyzer, CloserReleaseJoinDateAnalyzer, SuspiciousSetupAnalyzer, + WheelAbsenceAnalyzer, ] # The HeuristicResult sequence is aligned with the sequence of ANALYZERS list @@ -82,6 +85,7 @@ class MaliciousMetadataFacts(CheckFacts): HeuristicResult, HeuristicResult, HeuristicResult, + HeuristicResult, ], float, ] = { @@ -93,9 +97,10 @@ class MaliciousMetadataFacts(CheckFacts): HeuristicResult.SKIP, # Unchanged Release HeuristicResult.FAIL, # Closer Release Join Date HeuristicResult.FAIL, # Suspicious Setup + HeuristicResult.FAIL, # Wheel Absence # No project link, only one release, and the maintainer released it shortly # after account registration. - # The setup.py file contains suspicious imports. + # The setup.py file contains suspicious imports and .whl file isn't present. ): Confidence.HIGH, ( HeuristicResult.FAIL, # Empty Project @@ -105,9 +110,10 @@ class MaliciousMetadataFacts(CheckFacts): HeuristicResult.FAIL, # Unchanged Release HeuristicResult.FAIL, # Closer Release Join Date HeuristicResult.FAIL, # Suspicious Setup + HeuristicResult.FAIL, # Wheel Absence # No project link, frequent releases of multiple versions without modifying the content, # and the maintainer released it shortly after account registration. - # The setup.py file contains suspicious imports. + # The setup.py file contains suspicious imports and .whl file isn't present. ): Confidence.HIGH, ( HeuristicResult.FAIL, # Empty Project @@ -117,9 +123,10 @@ class MaliciousMetadataFacts(CheckFacts): HeuristicResult.PASS, # Unchanged Release HeuristicResult.FAIL, # Closer Release Join Date HeuristicResult.FAIL, # Suspicious Setup + HeuristicResult.FAIL, # Wheel Absence # No project link, frequent releases of multiple versions, # and the maintainer released it shortly after account registration. - # The setup.py file contains suspicious imports. + # The setup.py file contains suspicious imports and .whl file isn't present. ): Confidence.HIGH, ( HeuristicResult.FAIL, # Empty Project @@ -129,8 +136,23 @@ class MaliciousMetadataFacts(CheckFacts): HeuristicResult.FAIL, # Unchanged Release HeuristicResult.FAIL, # Closer Release Join Date HeuristicResult.PASS, # Suspicious Setup + HeuristicResult.PASS, # Wheel Absence # No project link, frequent releases of multiple versions without modifying the content, - # and the maintainer released it shortly after account registration. + # and the maintainer released it shortly after account registration. Presence/Absence of + # .whl file has no effect + ): Confidence.MEDIUM, + ( + HeuristicResult.FAIL, # Empty Project + HeuristicResult.SKIP, # Unreachable Project Links + HeuristicResult.PASS, # One Release + HeuristicResult.FAIL, # High Release Frequency + HeuristicResult.FAIL, # Unchanged Release + HeuristicResult.FAIL, # Closer Release Join Date + HeuristicResult.PASS, # Suspicious Setup + HeuristicResult.FAIL, # Wheel Absence + # No project link, frequent releases of multiple versions without modifying the content, + # and the maintainer released it shortly after account registration. Presence/Absence of + # .whl file has no effect ): Confidence.MEDIUM, ( HeuristicResult.PASS, # Empty Project @@ -140,9 +162,10 @@ class MaliciousMetadataFacts(CheckFacts): HeuristicResult.PASS, # Unchanged Release HeuristicResult.FAIL, # Closer Release Join Date HeuristicResult.FAIL, # Suspicious Setup + HeuristicResult.FAIL, # Wheel Absence # All project links are unreachable, frequent releases of multiple versions, # and the maintainer released it shortly after account registration. - # The setup.py file contains suspicious imports. + # The setup.py file contains suspicious imports and .whl file isn't present. ): Confidence.HIGH, } @@ -197,6 +220,11 @@ def run_heuristics( ------- tuple[dict[Heuristics, HeuristicResult], dict[str, JsonType]] Containing the analysis results and relevant metadata. + + Raises + ------ + HeuristicAnalyzerValueError + If a heuristic analysis fails due to malformed package information. """ results: dict[Heuristics, HeuristicResult] = {} detail_info: dict[str, JsonType] = {} @@ -277,7 +305,11 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: # Download the PyPI package JSON, but no need to persist it to the filesystem. if pypi_package_json.download(dest=""): - result, detail_info = self.run_heuristics(pypi_package_json) + try: + result, detail_info = self.run_heuristics(pypi_package_json) + except HeuristicAnalyzerValueError: + return CheckResultData(result_tables=[], result_type=CheckResultType.UNKNOWN) + result_combo: tuple = tuple(result.values()) confidence: float | None = SUSPICIOUS_COMBO.get(result_combo, None) result_type = CheckResultType.FAILED diff --git a/tests/integration/cases/ajax-requester_pypi_malware_analyzer/policy.dl b/tests/integration/cases/ajax-requester_pypi_malware_analyzer/policy.dl new file mode 100644 index 000000000..1c239798a --- /dev/null +++ b/tests/integration/cases/ajax-requester_pypi_malware_analyzer/policy.dl @@ -0,0 +1,11 @@ +/* Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. */ +/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ + +#include "prelude.dl" + +Policy("check-ajax-requester", component_id, "Check ajax-requester artifacts") :- + check_passed(component_id, "mcn_detect_malicious_metadata_1"). + +apply_policy_to("check-ajax-requester", component_id) :- + is_component(component_id, purl), + match("pkg:pypi/ajax-requester", purl). diff --git a/tests/integration/cases/ajax-requester_pypi_malware_analyzer/test.yaml b/tests/integration/cases/ajax-requester_pypi_malware_analyzer/test.yaml new file mode 100644 index 000000000..3e6783fd1 --- /dev/null +++ b/tests/integration/cases/ajax-requester_pypi_malware_analyzer/test.yaml @@ -0,0 +1,22 @@ +# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +description: | + Analyzing the metadata of a project with unreachable links, frequent releases, and the maintainer + released it shortly after account registration with a suspicious setup.py file, but a wheel file + is present. Macaron should report a pass for such a package. + +tags: +- macaron-python-package + +steps: +- name: Run macaron analyze against ajax-requester + kind: analyze + options: + command_args: + - -purl + - pkg:pypi/ajax-requester +- name: Run macaron verify-policy to check the results + kind: verify + options: + policy: policy.dl diff --git a/tests/malware_analyzer/pypi/test_wheel_absence.py b/tests/malware_analyzer/pypi/test_wheel_absence.py new file mode 100644 index 000000000..718417927 --- /dev/null +++ b/tests/malware_analyzer/pypi/test_wheel_absence.py @@ -0,0 +1,173 @@ +# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Tests for heuristic detecting wheel (.whl) file absence from PyPI packages""" +from unittest.mock import MagicMock + +import pytest + +from macaron.errors import HeuristicAnalyzerValueError +from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult +from macaron.malware_analyzer.pypi_heuristics.metadata.wheel_absence import WheelAbsenceAnalyzer + + +def test_analyze_no_information(pypi_package_json: MagicMock) -> None: + """Test for when there is no release information, so error""" + analyzer = WheelAbsenceAnalyzer() + + pypi_package_json.get_releases.return_value = None + + with pytest.raises(HeuristicAnalyzerValueError): + analyzer.analyze(pypi_package_json) + + +def test_analyze_tar_present(pypi_package_json: MagicMock) -> None: + """Test for when only .tar.gz is present, so failed""" + analyzer = WheelAbsenceAnalyzer() + version = "0.1.0" + filename = "ttttttttest_nester.py-0.1.0.tar.gz" + + release = { + version: [ + { + "comment_text": "", + "digests": { + "blake2b_256": "defa2fbcebaeeb909511139ce28dac4a77ab2452ba72b49a22b12981b2f375b3", + "md5": "9203bbb130f8ddb38269f4861c170d04", + "sha256": "168bcccbf5106132e90b85659297700194369b8f6b3e5a03769614f0d200e370", + }, + "downloads": -1, + "filename": filename, + "has_sig": False, + "md5_digest": "9203bbb130f8ddb38269f4861c170d04", + "packagetype": "sdist", + "python_version": "source", + "requires_python": None, + "size": 546, + "upload_time": "2016-10-13T05:42:27", + "upload_time_iso_8601": "2016-10-13T05:42:27.073842Z", + "url": f"https://files.pythonhosted.org/packages/de/fa/2fbcebaeeb909511139ce28d \ + ac4a77ab2452ba72b49a22b12981b2f375b3/{filename}", + "yanked": False, + "yanked_reason": None, + } + ] + } + + pypi_package_json.get_releases.return_value = release + pypi_package_json.get_latest_version.return_value = version + pypi_package_json.component.version = None + expected_result: tuple[HeuristicResult, dict] = (HeuristicResult.FAIL, {version: [filename]}) + + actual_result = analyzer.analyze(pypi_package_json) + + assert actual_result == expected_result + + +def test_analyze_whl_present(pypi_package_json: MagicMock) -> None: + """Test for when only .whl is present, so pass""" + analyzer = WheelAbsenceAnalyzer() + version = "0.1.0" + filename = "ttttttttest_nester.py-0.1.0.whl" + + release = { + version: [ + { + "comment_text": "", + "digests": { + "blake2b_256": "defa2fbcebaeeb909511139ce28dac4a77ab2452ba72b49a22b12981b2f375b3", + "md5": "9203bbb130f8ddb38269f4861c170d04", + "sha256": "168bcccbf5106132e90b85659297700194369b8f6b3e5a03769614f0d200e370", + }, + "downloads": -1, + "filename": filename, + "has_sig": False, + "md5_digest": "9203bbb130f8ddb38269f4861c170d04", + "packagetype": "bdist_wheel", + "python_version": "py2.py3", + "requires_python": None, + "size": 546, + "upload_time": "2016-10-13T05:42:27", + "upload_time_iso_8601": "2016-10-13T05:42:27.073842Z", + "url": f"https://files.pythonhosted.org/packages/de/fa/2fbcebaeeb909511139ce28d \ + ac4a77ab2452ba72b49a22b12981b2f375b3/{filename}", + "yanked": False, + "yanked_reason": None, + } + ] + } + + pypi_package_json.get_releases.return_value = release + pypi_package_json.component.version = version + expected_result: tuple[HeuristicResult, dict] = (HeuristicResult.PASS, {version: [filename]}) + + actual_result = analyzer.analyze(pypi_package_json) + + assert actual_result == expected_result + + +def test_analyze_both_present(pypi_package_json: MagicMock) -> None: + """Test for when both .tar.gz and .whl are present, so passed""" + analyzer = WheelAbsenceAnalyzer() + version = "0.1.0" + file_prefix = "ttttttttest_nester.py-0.1.0" + + release = { + version: [ + { + "comment_text": "", + "digests": { + "blake2b_256": "defa2fbcebaeeb909511139ce28dac4a77ab2452ba72b49a22b12981b2f375b3", + "md5": "9203bbb130f8ddb38269f4861c170d04", + "sha256": "168bcccbf5106132e90b85659297700194369b8f6b3e5a03769614f0d200e370", + }, + "downloads": -1, + "filename": f"{file_prefix}.whl", + "has_sig": False, + "md5_digest": "9203bbb130f8ddb38269f4861c170d04", + "packagetype": "bdist_wheel", + "python_version": "py2.py3", + "requires_python": None, + "size": 546, + "upload_time": "2016-10-13T05:42:27", + "upload_time_iso_8601": "2016-10-13T05:42:27.073842Z", + "url": f"https://files.pythonhosted.org/packages/de/fa/2fbcebaeeb909511139ce28d \ + ac4a77ab2452ba72b49a22b12981b2f375b3/{file_prefix}.whl", + "yanked": False, + "yanked_reason": None, + }, + { + "comment_text": "", + "digests": { + "blake2b_256": "defa2fbcebaeeb909511139ce28dac4a77ab2452ba72b49a22b12981b2f375b3", + "md5": "9203bbb130f8ddb38269f4861c170d04", + "sha256": "168bcccbf5106132e90b85659297700194369b8f6b3e5a03769614f0d200e370", + }, + "downloads": -1, + "filename": f"{file_prefix}.tar.gz", + "has_sig": False, + "md5_digest": "9203bbb130f8ddb38269f4861c170d04", + "packagetype": "sdist", + "python_version": "source", + "requires_python": None, + "size": 546, + "upload_time": "2016-10-13T05:42:27", + "upload_time_iso_8601": "2016-10-13T05:42:27.073842Z", + "url": f"https://files.pythonhosted.org/packages/de/fa/2fbcebaeeb909511139ce28d \ + ac4a77ab2452ba72b49a22b12981b2f375b3/{file_prefix}.tar.gz", + "yanked": False, + "yanked_reason": None, + }, + ] + } + + pypi_package_json.get_releases.return_value = release + pypi_package_json.component.version = version + expected_result: tuple[HeuristicResult, dict] = ( + HeuristicResult.PASS, + {version: [f"{file_prefix}.whl", f"{file_prefix}.tar.gz"]}, + ) + + actual_result = analyzer.analyze(pypi_package_json) + + assert actual_result == expected_result