Skip to content

Commit

Permalink
feat: report known malware for all ecosystems (#922)
Browse files Browse the repository at this point in the history
If a package is already known to be malicious, this PR reports it as part of the mcn_detect_malicious_metadata_1 check. Additionally, two new integration tests for known Python and npm malware have been added.

Signed-off-by: behnazh-w <[email protected]>
  • Loading branch information
behnazh-w authored Nov 22, 2024
1 parent baaff5f commit b5afe0d
Show file tree
Hide file tree
Showing 8 changed files with 180 additions and 6 deletions.
2 changes: 1 addition & 1 deletion docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ Macaron checks that report integrity issues but do not map to SLSA requirements
* - Check ID
- Description
* - ``mcn_detect_malicious_metadata_1``
- This check analyzes the metadata of a package and reports malicious behavior. This check currently supports PyPI packages.
- This check performs analysis on PyPI package metadata to detect malicious behavior. It also reports known malware from other ecosystems, but the analysis is currently limited to PyPI packages.

----------------------
How does Macaron work?
Expand Down
4 changes: 3 additions & 1 deletion docs/source/pages/tutorials/detect_malicious_package.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@ In this tutorial we show how to use Macaron to find malicious packages. Imagine
:widths: 25
:header-rows: 1

* - Supported packages
* - Supported packages for analysis
* - Python packages (PyPI)

Note that known malware is reported for packages across all ecosystems.

.. contents:: :local:


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@

import logging

from sqlalchemy import ForeignKey
import requests
from sqlalchemy import ForeignKey, String
from sqlalchemy.orm import Mapped, mapped_column

from macaron.database.db_custom_types import DBJsonDict
from macaron.database.table_definitions import CheckFacts
from macaron.json_tools import JsonType
from macaron.json_tools import JsonType, json_extract
from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer
from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics
from macaron.malware_analyzer.pypi_heuristics.metadata.closer_release_join_date import CloserReleaseJoinDateAnalyzer
Expand All @@ -28,6 +29,7 @@
from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset, PyPIRegistry
from macaron.slsa_analyzer.registry import registry
from macaron.slsa_analyzer.specs.package_registry_spec import PackageRegistryInfo
from macaron.util import send_post_http_raw

logger: logging.Logger = logging.getLogger(__name__)

Expand All @@ -40,10 +42,15 @@ class MaliciousMetadataFacts(CheckFacts):
#: The primary key.
id: Mapped[int] = mapped_column(ForeignKey("_check_facts.id"), primary_key=True) # noqa: A003

#: Known malware.
known_malware: Mapped[str | None] = mapped_column(
String, nullable=True, info={"justification": JustificationType.HREF}
)

#: Detailed information about the analysis.
detail_information: Mapped[dict[str, JsonType]] = mapped_column(DBJsonDict, nullable=False)

#: The result of analysis, which is of dict[Heuristics, HeuristicResult] type.
#: The result of analysis, which can be an empty dictionary.
result: Mapped[dict[Heuristics, HeuristicResult]] = mapped_column(
DBJsonDict, nullable=False, info={"justification": JustificationType.TEXT}
)
Expand Down Expand Up @@ -223,14 +230,43 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
CheckResultData
The result of the check.
"""
result_tables: list[CheckFacts] = []
# First check if this package is a known malware

url = "https://api.osv.dev/v1/query"
data = {"package": {"purl": ctx.component.purl}}
response = send_post_http_raw(url, json_data=data, headers=None)
res_obj = None
if response:
try:
res_obj = response.json()
except requests.exceptions.JSONDecodeError as error:
logger.debug("Unable to get a valid response from %s: %s", url, error)
if res_obj:
for vuln in res_obj.get("vulns", {}):
v_id = json_extract(vuln, ["id"], str)
if v_id and v_id.startswith("MAL-"):
result_tables.append(
MaliciousMetadataFacts(
known_malware=f"https://osv.dev/vulnerability/{v_id}",
result={},
detail_information=vuln,
confidence=Confidence.HIGH,
)
)
if result_tables:
return CheckResultData(
result_tables=result_tables,
result_type=CheckResultType.FAILED,
)

package_registry_info_entries = ctx.dynamic_data["package_registries"]
for package_registry_info_entry in package_registry_info_entries:
match package_registry_info_entry:
case PackageRegistryInfo(
build_tool=Pip() | Poetry(),
package_registry=PyPIRegistry() as pypi_registry,
) as pypi_registry_info:
result_tables: list[CheckFacts] = []

# Create an AssetLocator object for the PyPI package JSON object.
pypi_package_json = PyPIPackageJsonAsset(
Expand Down
74 changes: 74 additions & 0 deletions src/macaron/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,80 @@ def send_get_http_raw(
return response


def send_post_http_raw(
url: str,
json_data: dict | None = None,
headers: dict | None = None,
timeout: int | None = None,
allow_redirects: bool = True,
) -> Response | None:
"""Send a POST HTTP request with the given url, data, and headers.
This method also handle logging when the API server returns error status code.
Parameters
----------
url : str
The url of the request.
json_data: dict | None
The request payload.
headers : dict | None
The dict that describes the headers of the request.
timeout: int | None
The request timeout (optional).
allow_redirects: bool
Whether to allow redirects. Default: True.
Returns
-------
Response | None
If a Response object is returned and ``allow_redirects`` is ``True`` (the default) it will have a status code of
200 (OK). If ``allow_redirects`` is ``False`` the response can instead have a status code of 302. Otherwise, the
request has failed and ``None`` will be returned.
"""
logger.debug("POST - %s", url)
if not timeout:
timeout = defaults.getint("requests", "timeout", fallback=10)
error_retries = defaults.getint("requests", "error_retries", fallback=5)
retry_counter = error_retries
try:
response = requests.post(
url=url,
json=json_data,
headers=headers,
timeout=timeout,
allow_redirects=allow_redirects,
)
except requests.exceptions.RequestException as error:
logger.debug(error)
return None
if not allow_redirects and response.status_code == 302:
# Found, most likely because a redirect is about to happen.
return response
while response.status_code != 200:
logger.debug(
"Receiving error code %s from server.",
response.status_code,
)
if retry_counter <= 0:
logger.debug("Maximum retries reached: %s", error_retries)
return None
if response.status_code == 403:
check_rate_limit(response)
else:
return None
retry_counter = retry_counter - 1
response = requests.post(
url=url,
json=json_data,
headers=headers,
timeout=timeout,
allow_redirects=allow_redirects,
)

return response


def check_rate_limit(response: Response) -> None:
"""Check the remaining calls limit to GitHub API and wait accordingly.
Expand Down
10 changes: 10 additions & 0 deletions tests/integration/cases/tautoak4-hello-world/policy.dl
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
/* Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. */
/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */

#include "prelude.dl"

Policy("check-malicious-package", component_id, "Check the malicious package.") :-
check_failed(component_id, "mcn_detect_malicious_metadata_1").

apply_policy_to("check-malicious-package", component_id) :-
is_component(component_id, "pkg:npm/tautoak4-hello-world").
21 changes: 21 additions & 0 deletions tests/integration/cases/tautoak4-hello-world/test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

description: |
Analyzing a known malicious package.
tags:
- macaron-python-package
- macaron-docker-image

steps:
- name: Run macaron analyze
kind: analyze
options:
command_args:
- -purl
- pkg:npm/tautoak4-hello-world
- name: Run macaron verify-policy to verify that the malicious metadata check fails.
kind: verify
options:
policy: policy.dl
10 changes: 10 additions & 0 deletions tests/integration/cases/type-extension/policy.dl
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
/* Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. */
/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */

#include "prelude.dl"

Policy("check-malicious-package", component_id, "Check the malicious package.") :-
check_failed(component_id, "mcn_detect_malicious_metadata_1").

apply_policy_to("check-malicious-package", component_id) :-
is_component(component_id, "pkg:pypi/type-extension").
21 changes: 21 additions & 0 deletions tests/integration/cases/type-extension/test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

description: |
Analyzing a known malicious package.
tags:
- macaron-python-package
- macaron-docker-image

steps:
- name: Run macaron analyze
kind: analyze
options:
command_args:
- -purl
- pkg:pypi/type-extension
- name: Run macaron verify-policy to verify that the malicious metadata check fails.
kind: verify
options:
policy: policy.dl

0 comments on commit b5afe0d

Please sign in to comment.