From 28d2a5b764feb1275b35c0917af42257c933f2a3 Mon Sep 17 00:00:00 2001 From: John Andersen Date: Mon, 17 Jun 2024 15:50:57 +0000 Subject: [PATCH] feat: documentation on out of tree parsers Signed-off-by: John Andersen --- cve_bin_tool/cvedb.py | 9 + cve_bin_tool/parsers/__init__.py | 1 + cve_bin_tool/parsers/env.py | 132 +++++++++++++ .../parsers/static_analysis_bandit.py | 179 ++++++++++++++++++ doc/PARSERS.rst | 168 ++++++++++++++++ doc/index.rst | 1 + test/parser_env_test_0001.env | 4 + test/test_parsers.py | 61 +++++- 8 files changed, 553 insertions(+), 2 deletions(-) create mode 100644 cve_bin_tool/parsers/env.py create mode 100644 cve_bin_tool/parsers/static_analysis_bandit.py create mode 100644 doc/PARSERS.rst create mode 100644 test/parser_env_test_0001.env diff --git a/cve_bin_tool/cvedb.py b/cve_bin_tool/cvedb.py index 1451eaa996..b32fa7f3b9 100644 --- a/cve_bin_tool/cvedb.py +++ b/cve_bin_tool/cvedb.py @@ -7,6 +7,7 @@ from __future__ import annotations import asyncio +import contextlib import datetime import json import logging @@ -1193,3 +1194,11 @@ def fetch_from_mirror(self, mirror, pubkey, ignore_signature, log_signature_erro else: self.clear_cached_data() return -1 + + @contextlib.contextmanager + def with_cursor(self): + cursor = self.db_open_and_get_cursor() + try: + yield cursor + finally: + self.db_close() diff --git a/cve_bin_tool/parsers/__init__.py b/cve_bin_tool/parsers/__init__.py index 1e39cc33f5..7657cdfb2d 100644 --- a/cve_bin_tool/parsers/__init__.py +++ b/cve_bin_tool/parsers/__init__.py @@ -25,6 +25,7 @@ "php", "perl", "dart", + "env", ] diff --git a/cve_bin_tool/parsers/env.py b/cve_bin_tool/parsers/env.py new file mode 100644 index 0000000000..5b84bcce65 --- /dev/null +++ b/cve_bin_tool/parsers/env.py @@ -0,0 +1,132 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: GPL-3.0-or-later + +import dataclasses +import pathlib +import re + +from packageurl import PackageURL + +from cve_bin_tool.parsers import Parser +from cve_bin_tool.util import ProductInfo, ScanInfo + + +@dataclasses.dataclass +class EnvNamespaceConfig: + ad_hoc_cve_id: str + vendor: str + product: str + version: str + location: str = "/usr/local/bin/product" + + +@dataclasses.dataclass +class EnvConfig: + namespaces: dict[str, EnvNamespaceConfig] + + +class EnvParser(Parser): + """ + Parser for Python requirements files. + This parser is designed to parse Python requirements files (usually named + requirements.txt) and generate PURLs (Package URLs) for the listed packages. + """ + + PARSER_MATCH_FILENAMES = [ + ".env", + ] + + @staticmethod + def parse_file_contents(contents): + lines = list( + [ + line + for line in contents.replace("\r\n", "\n").split("\n") + if line.strip() and line.startswith("CVE_BIN_TOOL_") + ] + ) + namespaces = {} + for i, line in enumerate(lines): + key, value = line.split("=", maxsplit=1) + namespace, key = key[len("CVE_BIN_TOOL_") :].split("_", maxsplit=1) + if value.startswith('"'): + value = value[1:] + if value.endswith('"'): + value = value[:-1] + namespaces.setdefault(namespace, {}) + namespaces[namespace][key.lower()] = value + for namespace, config in namespaces.items(): + namespaces[namespace] = EnvNamespaceConfig(**config) + return EnvConfig(namespaces=namespaces) + + def run_checker(self, filename): + """ + Parse the .env file and yield ScanInfo objects for the listed packages. + Args: + filename (str): The path to the .env file. + Yields: + str: ScanInfo objects for the packages listed in the file. + """ + self.filename = filename + contents = pathlib.Path(self.filename).read_text() + + env_config = self.parse_file_contents(contents) + + data_source = "environment" + affected_data = [ + { + "cve_id": cve.ad_hoc_cve_id, + "vendor": cve.vendor, + "product": cve.product, + # TODO Version MUST be unique to this bug! + "version": cve.version, + "versionStartIncluding": "", + # "versionStartIncluding": cve.version, + "versionStartExcluding": "", + "versionEndIncluding": "", + # "versionEndIncluding": cve.version, + "versionEndExcluding": "", + } + for _namespace, cve in env_config.namespaces.items() + ] + severity_data = [ + { + "ID": cve.ad_hoc_cve_id, + # TODO severity + "severity": "LOW", + # TODO description + "description": "TODO", + # TODO score + "score": 0, + # TODO CVSS_version + "CVSS_version": 3, + # TODO CVSS_vector + "CVSS_vector": "", + "last_modified": "", + } + for _namespace, cve in env_config.namespaces.items() + ] + + with self.cve_db.with_cursor() as cursor: + self.cve_db.populate_cve_metrics(severity_data, cursor) + self.cve_db.populate_severity(severity_data, cursor, data_source) + self.cve_db.populate_affected(affected_data, cursor, data_source) + + for _namespace, cve in env_config.namespaces.items(): + yield ScanInfo( + ProductInfo( + cve.vendor, + cve.product, + cve.version, + cve.location, + PackageURL( + type="ad-hoc", + namespace=cve.vendor, + name=re.sub(r"[^a-zA-Z0-9._-]", "", cve.product).lower(), + version=cve.version, + qualifiers={}, + subpath=None, + ), + ), + pathlib.Path(filename).resolve(), + ) diff --git a/cve_bin_tool/parsers/static_analysis_bandit.py b/cve_bin_tool/parsers/static_analysis_bandit.py new file mode 100644 index 0000000000..f5f03e5b3b --- /dev/null +++ b/cve_bin_tool/parsers/static_analysis_bandit.py @@ -0,0 +1,179 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: GPL-3.0-or-later + +import dataclasses +import json +import pathlib +import re +import subprocess +import sys +import uuid + +from packageurl import PackageURL + +from cve_bin_tool.parsers import Parser +from cve_bin_tool.util import ProductInfo, ScanInfo + + +@dataclasses.dataclass +class BanditNamespaceConfig: + ad_hoc_cve_id: str + vendor: str + product: str + version: str + location: str + description: str + severity: str + score: float + + +@dataclasses.dataclass +class BanditConfig: + namespaces: dict[str, BanditNamespaceConfig] + + +class BanditParser(Parser): + """ + Parser for Python requirements files. + This parser is designed to parse Python requirements files (usually named + requirements.txt) and generate PURLs (Package URLs) for the listed packages. + """ + + PARSER_MATCH_FILENAMES = [ + ".py", + ] + + @staticmethod + def parse_bandit_output(contents): + vendor = "TODO-myvendor" + product = "TODO-myproduct" + version = f"v0.0.0.dev-SomeShaValue-N-Other-Branches-Workload-ID-Scan-Number-{uuid.uuid4()}" + + contents = json.loads(contents) + + errors = contents.get("errors", []) + if errors: + raise Exception(json.dumps(contents)) + + namespaces = {} + for i, result in enumerate(contents.get("results", [])): + # Version is the same when code at location matches code from output + result["issue_text"] + result["code"] + + # TODO Replace UUID with with SCITT URN + # SCITT A.4.2 + ad_hoc_cve_id = f"CVE-0001-urn:ietf:params:scitt:statement:sha-256:base64url:5i6UeRzg1...{i}...qnGmr1o" + + # TODO Sort by something, line? Int of content address? + namespace = f"bandit-{i}" + + # TODO Take vendor product and version automatically from git repo + # or installed pypi package meta-info. + namespaces[namespace] = BanditNamespaceConfig( + ad_hoc_cve_id=ad_hoc_cve_id, + vendor=vendor, + product=product, + version=version, + severity="LOW", + score=0.0, + location=result["line_number"], + description=json.dumps(result), + ) + return BanditConfig(namespaces=namespaces) + + def run_checker(self, filename): + """ + Parse the .bandit file and yield ScanInfo objects for the listed packages. + Args: + filename (str): The path to the .bandit file. + Yields: + str: ScanInfo objects for the packages listed in the file. + """ + file_path = pathlib.Path(filename).resolve() + cmd = [ + sys.executable, + "-um", + "bandit", + "-f", + "json", + "--exit-zero", + "--", + # TODO Relative paths? Need top level directory being scanned + str(file_path), + ] + try: + stdout = subprocess.check_output( + cmd, + ) + except subprocess.CalledProcessError as error: + raise Exception(error.stderr) from error + + bandit_config = self.parse_bandit_output(stdout) + + # TODO Create SCITT_URN_FOR_MANIFEST_OF_EXECUTED_WORKFLOW_WITH_SARIF_OUTPUTS_DEREFERENCEABLE + # by making a request to the poligy engine and getting it's workflow + # manifest as output and deriving from that or extend it to return that. + data_source = "SCITT_URN_FOR_MANIFEST_OF_EXECUTED_WORKFLOW_WITH_SARIF_OUTPUTS_DEREFERENCEABLE" + + affected_data = [] + severity_data = [] + + for _namespace, cve in bandit_config.namespaces.items(): + affected_data.append( + { + "cve_id": cve.ad_hoc_cve_id, + "vendor": cve.vendor, + "product": cve.product, + # TODO Version MUST be unique to this bug! + "version": cve.version, + "versionStartIncluding": "", + # "versionStartIncluding": cve.version, + "versionStartExcluding": "", + "versionEndIncluding": "", + # "versionEndIncluding": cve.version, + "versionEndExcluding": "", + } + ) + severity_data.append( + { + "ID": cve.ad_hoc_cve_id, + # TODO severity + "severity": cve.severity, + # TODO description + "description": cve.description, + # TODO score + "score": 0, + # TODO CVSS_version + "CVSS_version": 3, + # TODO CVSS_vector + "CVSS_vector": "", + # TODO Ideally this comes from bisecting and pinpointing the + # bug's introduction to the codebase + "last_modified": "", + } + ) + + with self.cve_db.with_cursor() as cursor: + self.cve_db.populate_cve_metrics(severity_data, cursor) + self.cve_db.populate_severity(severity_data, cursor, data_source) + self.cve_db.populate_affected(affected_data, cursor, data_source) + + product_info = ProductInfo( + cve.vendor, + cve.product, + cve.version, + cve.location, + PackageURL( + type="ad-hoc", + namespace=cve.vendor, + name=re.sub(r"[^a-zA-Z0-9._-]", "", cve.product).lower(), + version=cve.version, + qualifiers={}, + subpath=None, + ), + ) + for _namespace, cve in bandit_config.namespaces.items(): + yield ScanInfo(product_info, pathlib.Path(filename).resolve()) + + # TODO VEX attached via linked data to ad-hoc CVE-ID diff --git a/doc/PARSERS.rst b/doc/PARSERS.rst new file mode 100644 index 0000000000..e25d6b2ec4 --- /dev/null +++ b/doc/PARSERS.rst @@ -0,0 +1,168 @@ +Adding a new parser to cve-bin-tool +=================================== + +Overview +-------- + +Parsers enhance ``cve-bin-tool`` by helping it discover vulnerabilities for +different file types and manifest formats. + +Parsers +------- + +The following parsers have been added to the project: + +- **DartParser** +- **GoParser** +- **JavaParser** +- **JavascriptParser** +- **PerlParser** +- **PhpParser** +- **PythonParser** +- **PythonRequirementsParser** +- **RParser** +- **RubyParser** +- **RustParser** +- **SwiftParser** +- **BanditParser** + +Usage +----- + +To utilize these parsers, ensure that your project includes the following imports: + +.. code-block:: python + + from cve_bin_tool.parsers.dart import DartParser + from cve_bin_tool.parsers.go import GoParser + from cve_bin_tool.parsers.java import JavaParser + from cve_bin_tool.parsers.javascript import JavascriptParser + from cve_bin_tool.parsers.perl import PerlParser + from cve_bin_tool.parsers.php import PhpParser + from cve_bin_tool.parsers.python import PythonParser, PythonRequirementsParser + from cve_bin_tool.parsers.r import RParser + from cve_bin_tool.parsers.ruby import RubyParser + from cve_bin_tool.parsers.rust import RustParser + from cve_bin_tool.parsers.swift import SwiftParser + from cve_bin_tool.parsers.bandit import BanditParser + +Setting Up a New Package and Entry Point +---------------------------------------- + +To implement a new parser plugin, such as a Bandit parser, follow these steps: + +1. Create the Parser Class +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +First, create the parser class. This class should be located in the appropriate directory within your project. For example, you might place it in ``cve_bin_tool_parser_env/env.py``. + +.. literalinclude:: /../cve_bin_tool/parsers/env.py + +2. Set Up ``setup.py`` +^^^^^^^^^^^^^^^^^^^^^^ + +Next, configure the ``setup.py`` file boilerplate. + +.. code-block:: python + + # setup.py + + import sys + import site + import setuptools + + # See https://github.com/pypa/pip/issues/7953 + site.ENABLE_USER_SITE = "--user" in sys.argv[1:] + + setuptools.setup(use_scm_version=True) + +3. Set Up ``setup.cfg`` +^^^^^^^^^^^^^^^^^^^^^^^ + +Next, configure the ``setup.cfg`` file to include your new parser as an entry point. This allows the parser to be dynamically discovered and used by the project. + +.. code-block:: ini + + # setup.cfg + + [metadata] + name = cve-bin-tool-parser-env + version = 1.0.0 + description = CVE Binary Tool Parser Plugin: .env + + [options] + packages = find: + entry_points = file: entry_points.txt + setup_requires = + setuptools_scm[toml]>=3.4.3 + +4. Create ``entry_points.txt`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You may also need to configure an ``entry_points.txt`` file if your project uses it to manage entry points. + +.. code-block:: ini + + # entry_points.txt + + [cve_bin_tool.parsers] + env = cve_bin_tool_parser_env.env:EnvParser + +5. Install your plugin +^^^^^^^^^^^^^^^^^^^^^^ + +You need to activate your virtualenv before installing if you set one up. + +.. code-block:: console + + $ python -m pip install -e . + +6. Populate the to-be-parsed file +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In this example we implemented the ``EnvParser`` which is the format of +``/etc/environment``. + +.. literalinclude:: /../test/parser_env_test_0001.env + +7. Run ``cve-bin-tool`` and see your plugin's findings +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Let's test that our defined CVE comes up by scanning a ``.env`` file. + +.. code-block:: console + + $ cve-bin-tool --log debug setup.py + +Advanced Example: Ad-Hoc CVEs +----------------------------- + +For more information see: https://github.com/ossf/wg-vulnerability-disclosures/issues/94 + +1. Create the Parser Class +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +First, create the parser class. This class should be located in the appropriate directory within your project. For example, you might place it in ``cve_bin_tool_parser_static_analysis_bandit/bandit.py``. + +.. literalinclude:: /../cve_bin_tool/parsers/static_analysis_bandit.py + +2. Run ``cve-bin-tool`` and see your plugin's findings +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In this example we implemented the ``BanditParser`` which is a static +analysis tool for Python files. We'll test that it loads by scanning +a ``.py`` file. + +.. code-block:: console + + $ cve-bin-tool --log debug setup.py + +Test Implementation +------------------- + +A new test class `TestParsers` has been introduced to verify that the expected file types are correctly mapped to their respective parsers. The test ensures that the actual valid files match the expected valid files. + +Test Method +^^^^^^^^^^^ + +- `test_parser_match_filenames_results_in_correct_valid_files`: This test compares the `EXPECTED_VALID_FILES` dictionary with the `actual_valid_files` dictionary imported from `cve_bin_tool.parsers.parse`. If there is any discrepancy between the two, the test will fail, indicating that the loaded file types do not match the expected registered file types. diff --git a/doc/index.rst b/doc/index.rst index a0de036e1a..798c77d831 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -19,6 +19,7 @@ The CVE Binary Tool helps you determine if your system includes known vulnerabil RELEASE.md CONTRIBUTING.md CHECKERS.md + PARSERS.rst sboms_for_humans/README.md new-contributor-tips.md pypi_downloads.md diff --git a/test/parser_env_test_0001.env b/test/parser_env_test_0001.env new file mode 100644 index 0000000000..5935efa108 --- /dev/null +++ b/test/parser_env_test_0001.env @@ -0,0 +1,4 @@ +CVE_BIN_TOOL_0_PRODUCT="myproduct" +CVE_BIN_TOOL_0_VENDOR="myvendor" +CVE_BIN_TOOL_0_VERSION="v0.0.0.dev-15abff2d529396937e18c657ecee1ed224842000" +CVE_BIN_TOOL_0_AD_HOC_CVE_ID="CVE-0001-15004435-aa84-43ff-9c26-f703a26069f8" diff --git a/test/test_parsers.py b/test/test_parsers.py index 09e6e88c98..137cb003ee 100644 --- a/test/test_parsers.py +++ b/test/test_parsers.py @@ -1,11 +1,21 @@ -import pytest +import atexit +import contextlib +import pathlib +import re +import tempfile import unittest -from cve_bin_tool.parsers.parse import valid_files as actual_valid_files +import pytest +from packageurl import PackageURL + +from cve_bin_tool.cvedb import CVEDB +from cve_bin_tool.log import LOGGER from cve_bin_tool.parsers.dart import DartParser +from cve_bin_tool.parsers.env import EnvParser from cve_bin_tool.parsers.go import GoParser from cve_bin_tool.parsers.java import JavaParser from cve_bin_tool.parsers.javascript import JavascriptParser +from cve_bin_tool.parsers.parse import valid_files as actual_valid_files from cve_bin_tool.parsers.perl import PerlParser from cve_bin_tool.parsers.php import PhpParser from cve_bin_tool.parsers.python import PythonParser, PythonRequirementsParser @@ -13,7 +23,16 @@ from cve_bin_tool.parsers.ruby import RubyParser from cve_bin_tool.parsers.rust import RustParser from cve_bin_tool.parsers.swift import SwiftParser +from cve_bin_tool.util import ProductInfo, ScanInfo + +cve_db = CVEDB() +logger = LOGGER.getChild(__name__) +stack = contextlib.ExitStack().__enter__() +tmpdir = stack.enter_context( + tempfile.TemporaryDirectory(prefix="cve-bin-tool-TEST_ENV") +) +atexit.register(lambda: stack.__exit__(None, None, None)) EXPECTED_VALID_FILES = { "pom.xml": [JavaParser], @@ -29,10 +48,17 @@ "composer.lock": [PhpParser], "cpanfile": [PerlParser], "pubspec.lock": [DartParser], + ".env": [EnvParser], } +PARSER_ENV_TEST_0001_ENV_CONTENTS = ( + pathlib.Path(__file__).parent.joinpath("parser_env_test_0001.env").read_text() +) + class TestParsers: + maxDiff = None + @pytest.mark.asyncio async def test_parser_match_filenames_results_in_correct_valid_files(self): unittest.TestCase().assertDictEqual( @@ -40,3 +66,34 @@ async def test_parser_match_filenames_results_in_correct_valid_files(self): actual_valid_files, "Expected registered file types not the same as loaded file types, second dict is actual file types loaded, first is expected", ) + + @pytest.mark.asyncio + async def test_parser_env_test_0001(self): + file_path = pathlib.Path(tmpdir, ".env") + file_path.write_text(PARSER_ENV_TEST_0001_ENV_CONTENTS) + env_parser = EnvParser(cve_db, logger) + results = list(env_parser.run_checker(file_path)) + unittest.TestCase().assertListEqual( + results, + [ + ScanInfo( + product_info=ProductInfo( + vendor="myvendor", + product="myproduct", + version="v0.0.0.dev-15abff2d529396937e18c657ecee1ed224842000", + # TODO location? + location="/usr/local/bin/product", + # TODO purl + purl=PackageURL( + type="ad-hoc", + namespace="myvendor", + name=re.sub(r"[^a-zA-Z0-9._-]", "", "myproduct").lower(), + version="v0.0.0.dev-15abff2d529396937e18c657ecee1ed224842000", + qualifiers={}, + subpath=None, + ), + ), + file_path=file_path, + ) + ], + )