Skip to content

Commit

Permalink
feat: add a new setup.py related heuristic in the pypi malware analyz…
Browse files Browse the repository at this point in the history
…er (#932)

New heuristic that checks for the presence of a wheel (.whl) file distributed with the pypi package, passing when a wheel file is present, and failing when it is absent.

Signed-off-by: Carl Flottmann <[email protected]>
  • Loading branch information
art1f1c3R authored Dec 2, 2024
1 parent 7cfc839 commit d4294d5
Show file tree
Hide file tree
Showing 8 changed files with 336 additions and 7 deletions.
2 changes: 1 addition & 1 deletion src/macaron/database/table_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ class PackageURLMixin:
name: Mapped[str] = mapped_column(String(100), nullable=False, comment="Name of the package.")

#: Version of the package.
version: Mapped[str] = mapped_column(String(100), nullable=True, comment="Version of the package.")
version: Mapped[str | None] = mapped_column(String(100), nullable=True, comment="Version of the package.")

#: Extra qualifying data for a package such as the name of an OS.
qualifiers: Mapped[str] = mapped_column(
Expand Down
4 changes: 4 additions & 0 deletions src/macaron/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,7 @@ class CycloneDXParserError(MacaronError):

class DependencyAnalyzerError(MacaronError):
"""The DependencyAnalyzer error class."""


class HeuristicAnalyzerValueError(MacaronError):
"""Error class for BaseHeuristicAnalyzer errors when parsing data."""
3 changes: 3 additions & 0 deletions src/macaron/malware_analyzer/pypi_heuristics/heuristics.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ class Heuristics(str, Enum):
#: Indicates that the setup.py file contains suspicious imports, such as base64 and requests.
SUSPICIOUS_SETUP = "suspicious_setup"

#: Indicates that the package does not include a .whl file
WHEEL_ABSENCE = "wheel_absence"


class HeuristicResult(str, Enum):
"""Result type indicating the outcome of a heuristic."""
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""The heuristic analyzer to check .whl file absence."""

import logging

from macaron.errors import HeuristicAnalyzerValueError
from macaron.json_tools import JsonType
from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer
from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics
from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset

logger: logging.Logger = logging.getLogger(__name__)


class WheelAbsenceAnalyzer(BaseHeuristicAnalyzer):
"""
Analyze to see if a .whl file is available for the package.
If a package is distributed with a .whl file, this heuristic passes. Otherwise, the
heuristic fails.
"""

WHEEL: str = "bdist_wheel"

def __init__(self) -> None:
super().__init__(
name="wheel_absence_analyzer",
heuristic=Heuristics.WHEEL_ABSENCE,
depends_on=None,
)

def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicResult, dict[str, JsonType]]:
"""Analyze the package.
Parameters
----------
pypi_package_json: PyPIPackageJsonAsset
The PyPI package JSON asset object.
Returns
-------
tuple[HeuristicResult, dict[str, JsonType]]:
The result and related information collected during the analysis.
Raises
------
HeuristicAnalyzerValueError
If there is no release information, or has no most recent version (if queried).
"""
releases = pypi_package_json.get_releases()
if releases is None: # no release information
error_msg = "There is no information for any release of this package."
logger.debug(error_msg)
raise HeuristicAnalyzerValueError(error_msg)

version = pypi_package_json.component.version
if version is None: # check latest release version
version = pypi_package_json.get_latest_version()

if version is None:
error_msg = "There is no latest version of this package."
logger.debug(error_msg)
raise HeuristicAnalyzerValueError(error_msg)

release_files: list[JsonType] = []
wheel_present: bool = False

try:
for release_metadata in releases[version]:
if release_metadata["packagetype"] == self.WHEEL:
wheel_present = True

release_files.append(release_metadata["filename"])
except KeyError as error:
error_msg = f"The version {version} is not available as a release."
logger.debug(error_msg)
raise HeuristicAnalyzerValueError(error_msg) from error

if wheel_present:
return HeuristicResult.PASS, {version: release_files}

return HeuristicResult.FAIL, {version: release_files}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from macaron.database.db_custom_types import DBJsonDict
from macaron.database.table_definitions import CheckFacts
from macaron.errors import HeuristicAnalyzerValueError
from macaron.json_tools import JsonType, json_extract
from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer
from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics
Expand All @@ -20,6 +21,7 @@
from macaron.malware_analyzer.pypi_heuristics.metadata.one_release import OneReleaseAnalyzer
from macaron.malware_analyzer.pypi_heuristics.metadata.unchanged_release import UnchangedReleaseAnalyzer
from macaron.malware_analyzer.pypi_heuristics.metadata.unreachable_project_links import UnreachableProjectLinksAnalyzer
from macaron.malware_analyzer.pypi_heuristics.metadata.wheel_absence import WheelAbsenceAnalyzer
from macaron.malware_analyzer.pypi_heuristics.sourcecode.suspicious_setup import SuspiciousSetupAnalyzer
from macaron.slsa_analyzer.analyze_context import AnalyzeContext
from macaron.slsa_analyzer.build_tool.pip import Pip
Expand Down Expand Up @@ -70,6 +72,7 @@ class MaliciousMetadataFacts(CheckFacts):
UnchangedReleaseAnalyzer,
CloserReleaseJoinDateAnalyzer,
SuspiciousSetupAnalyzer,
WheelAbsenceAnalyzer,
]

# The HeuristicResult sequence is aligned with the sequence of ANALYZERS list
Expand All @@ -82,6 +85,7 @@ class MaliciousMetadataFacts(CheckFacts):
HeuristicResult,
HeuristicResult,
HeuristicResult,
HeuristicResult,
],
float,
] = {
Expand All @@ -93,9 +97,10 @@ class MaliciousMetadataFacts(CheckFacts):
HeuristicResult.SKIP, # Unchanged Release
HeuristicResult.FAIL, # Closer Release Join Date
HeuristicResult.FAIL, # Suspicious Setup
HeuristicResult.FAIL, # Wheel Absence
# No project link, only one release, and the maintainer released it shortly
# after account registration.
# The setup.py file contains suspicious imports.
# The setup.py file contains suspicious imports and .whl file isn't present.
): Confidence.HIGH,
(
HeuristicResult.FAIL, # Empty Project
Expand All @@ -105,9 +110,10 @@ class MaliciousMetadataFacts(CheckFacts):
HeuristicResult.FAIL, # Unchanged Release
HeuristicResult.FAIL, # Closer Release Join Date
HeuristicResult.FAIL, # Suspicious Setup
HeuristicResult.FAIL, # Wheel Absence
# No project link, frequent releases of multiple versions without modifying the content,
# and the maintainer released it shortly after account registration.
# The setup.py file contains suspicious imports.
# The setup.py file contains suspicious imports and .whl file isn't present.
): Confidence.HIGH,
(
HeuristicResult.FAIL, # Empty Project
Expand All @@ -117,9 +123,10 @@ class MaliciousMetadataFacts(CheckFacts):
HeuristicResult.PASS, # Unchanged Release
HeuristicResult.FAIL, # Closer Release Join Date
HeuristicResult.FAIL, # Suspicious Setup
HeuristicResult.FAIL, # Wheel Absence
# No project link, frequent releases of multiple versions,
# and the maintainer released it shortly after account registration.
# The setup.py file contains suspicious imports.
# The setup.py file contains suspicious imports and .whl file isn't present.
): Confidence.HIGH,
(
HeuristicResult.FAIL, # Empty Project
Expand All @@ -129,8 +136,23 @@ class MaliciousMetadataFacts(CheckFacts):
HeuristicResult.FAIL, # Unchanged Release
HeuristicResult.FAIL, # Closer Release Join Date
HeuristicResult.PASS, # Suspicious Setup
HeuristicResult.PASS, # Wheel Absence
# No project link, frequent releases of multiple versions without modifying the content,
# and the maintainer released it shortly after account registration.
# and the maintainer released it shortly after account registration. Presence/Absence of
# .whl file has no effect
): Confidence.MEDIUM,
(
HeuristicResult.FAIL, # Empty Project
HeuristicResult.SKIP, # Unreachable Project Links
HeuristicResult.PASS, # One Release
HeuristicResult.FAIL, # High Release Frequency
HeuristicResult.FAIL, # Unchanged Release
HeuristicResult.FAIL, # Closer Release Join Date
HeuristicResult.PASS, # Suspicious Setup
HeuristicResult.FAIL, # Wheel Absence
# No project link, frequent releases of multiple versions without modifying the content,
# and the maintainer released it shortly after account registration. Presence/Absence of
# .whl file has no effect
): Confidence.MEDIUM,
(
HeuristicResult.PASS, # Empty Project
Expand All @@ -140,9 +162,10 @@ class MaliciousMetadataFacts(CheckFacts):
HeuristicResult.PASS, # Unchanged Release
HeuristicResult.FAIL, # Closer Release Join Date
HeuristicResult.FAIL, # Suspicious Setup
HeuristicResult.FAIL, # Wheel Absence
# All project links are unreachable, frequent releases of multiple versions,
# and the maintainer released it shortly after account registration.
# The setup.py file contains suspicious imports.
# The setup.py file contains suspicious imports and .whl file isn't present.
): Confidence.HIGH,
}

Expand Down Expand Up @@ -197,6 +220,11 @@ def run_heuristics(
-------
tuple[dict[Heuristics, HeuristicResult], dict[str, JsonType]]
Containing the analysis results and relevant metadata.
Raises
------
HeuristicAnalyzerValueError
If a heuristic analysis fails due to malformed package information.
"""
results: dict[Heuristics, HeuristicResult] = {}
detail_info: dict[str, JsonType] = {}
Expand Down Expand Up @@ -277,7 +305,11 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:

# Download the PyPI package JSON, but no need to persist it to the filesystem.
if pypi_package_json.download(dest=""):
result, detail_info = self.run_heuristics(pypi_package_json)
try:
result, detail_info = self.run_heuristics(pypi_package_json)
except HeuristicAnalyzerValueError:
return CheckResultData(result_tables=[], result_type=CheckResultType.UNKNOWN)

result_combo: tuple = tuple(result.values())
confidence: float | None = SUSPICIOUS_COMBO.get(result_combo, None)
result_type = CheckResultType.FAILED
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/* Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. */
/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */

#include "prelude.dl"

Policy("check-ajax-requester", component_id, "Check ajax-requester artifacts") :-
check_passed(component_id, "mcn_detect_malicious_metadata_1").

apply_policy_to("check-ajax-requester", component_id) :-
is_component(component_id, purl),
match("pkg:pypi/ajax-requester", purl).
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

description: |
Analyzing the metadata of a project with unreachable links, frequent releases, and the maintainer
released it shortly after account registration with a suspicious setup.py file, but a wheel file
is present. Macaron should report a pass for such a package.
tags:
- macaron-python-package

steps:
- name: Run macaron analyze against ajax-requester
kind: analyze
options:
command_args:
- -purl
- pkg:pypi/ajax-requester
- name: Run macaron verify-policy to check the results
kind: verify
options:
policy: policy.dl
Loading

0 comments on commit d4294d5

Please sign in to comment.