From 1197881a297b275ccea1165836a20443e26ad27f Mon Sep 17 00:00:00 2001 From: John Andersen Date: Mon, 17 Jun 2024 15:50:57 +0000 Subject: [PATCH] feat: documentation on out of tree parsers Signed-off-by: John feat: bandit adhoc cves nodemon -e py --exec "clear; cve-bin-tool -u never --log debug --format json --detailed -- cve_bin_tool/parsers/static_analysis_bandit.py; sh -c 'cat $(ls output.* | tail -n 1) | jq'; test 1" Signed-off-by: John Andersen --- cve_bin_tool/cvedb.py | 9 + cve_bin_tool/parsers/__init__.py | 1 + cve_bin_tool/parsers/env.py | 148 ++++++++++++++ .../parsers/static_analysis_bandit.py | 183 ++++++++++++++++++ doc/PARSERS.rst | 169 ++++++++++++++++ doc/index.rst | 1 + test/parser_env_test_0001.env | 4 + test/test_parsers.py | 63 +++++- 8 files changed, 577 insertions(+), 1 deletion(-) create mode 100644 cve_bin_tool/parsers/env.py create mode 100644 cve_bin_tool/parsers/static_analysis_bandit.py create mode 100644 doc/PARSERS.rst create mode 100644 test/parser_env_test_0001.env diff --git a/cve_bin_tool/cvedb.py b/cve_bin_tool/cvedb.py index 1451eaa996..99a29e4cca 100644 --- a/cve_bin_tool/cvedb.py +++ b/cve_bin_tool/cvedb.py @@ -8,6 +8,7 @@ import asyncio import datetime +import contextlib import json import logging import shutil @@ -1193,3 +1194,11 @@ def fetch_from_mirror(self, mirror, pubkey, ignore_signature, log_signature_erro else: self.clear_cached_data() return -1 + + @contextlib.contextmanager + def with_cursor(self): + cursor = self.db_open_and_get_cursor() + try: + yield cursor + finally: + self.db_close() diff --git a/cve_bin_tool/parsers/__init__.py b/cve_bin_tool/parsers/__init__.py index 1e39cc33f5..7657cdfb2d 100644 --- a/cve_bin_tool/parsers/__init__.py +++ b/cve_bin_tool/parsers/__init__.py @@ -25,6 +25,7 @@ "php", "perl", "dart", + "env", ] diff --git a/cve_bin_tool/parsers/env.py b/cve_bin_tool/parsers/env.py new file mode 100644 index 0000000000..d61f906b3b --- /dev/null +++ b/cve_bin_tool/parsers/env.py @@ -0,0 +1,148 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: GPL-3.0-or-later + +import json +import re +import pathlib +import subprocess +import contextlib +import dataclasses +from re import MULTILINE, compile, search + +from packageurl import PackageURL +from packaging.version import parse as parse_version + +from cve_bin_tool.parsers import Parser +from cve_bin_tool.strings import parse_strings +from cve_bin_tool.util import ProductInfo, ScanInfo + + +@dataclasses.dataclass +class EnvNamespaceConfig: + ad_hoc_cve_id: str + vendor: str + product: str + version: str + location: str = "/usr/local/bin/product" + + +@dataclasses.dataclass +class EnvConfig: + namespaces: dict[str, EnvNamespaceConfig] + + +class EnvParser(Parser): + """ + Parser for Python requirements files. + This parser is designed to parse Python requirements files (usually named + requirements.txt) and generate PURLs (Package URLs) for the listed packages. + """ + + PARSER_MATCH_FILENAMES = [ + ".env", + ] + + @staticmethod + def parse_file_contents(contents): + lines = list( + [ + line + for line in contents.replace("\r\n", "\n").split("\n") + if line.strip() and line.startswith("CVE_BIN_TOOL_") + ] + ) + namespaces = {} + for i, line in enumerate(lines): + key, value = line.split("=", maxsplit=1) + namespace, key = key[len("CVE_BIN_TOOL_") :].split("_", maxsplit=1) + if value.startswith('"'): + value = value[1:] + if value.endswith('"'): + value = value[:-1] + namespaces.setdefault(namespace, {}) + namespaces[namespace][key.lower()] = value + for namespace, config in namespaces.items(): + namespaces[namespace] = EnvNamespaceConfig(**config) + return EnvConfig(namespaces=namespaces) + + def run_checker(self, filename): + """ + Parse the .env file and yield ScanInfo objects for the listed packages. + Args: + filename (str): The path to the .env file. + Yields: + str: ScanInfo objects for the packages listed in the file. + """ + self.filename = filename + contents = pathlib.Path(self.filename).read_text() + + env_config = self.parse_file_contents(contents) + + # TODO Create SCITT_URN_FOR_MANIFEST_OF_EXECUTED_WORKFLOW_WITH_SARIF_OUTPUTS_DEREFERENCEABLE + # by making a request to the poligy engine and getting it's workflow + # manifest as output and deriving from that or extend it to return that. + data_source = "SCITT_URN_FOR_MANIFEST_OF_EXECUTED_WORKFLOW_WITH_SARIF_OUTPUTS_DEREFERENCEABLE" + affected_data = [ + { + "cve_id": cve.ad_hoc_cve_id, + "vendor": cve.vendor, + "product": cve.product, + # TODO Version MUST be unique to this bug! + "version": cve.version, + "versionStartIncluding": "", + # "versionStartIncluding": cve.version, + "versionStartExcluding": "", + "versionEndIncluding": "", + # "versionEndIncluding": cve.version, + "versionEndExcluding": "", + } + for _namespace, cve in env_config.namespaces.items() + ] + severity_data = [ + { + "ID": cve.ad_hoc_cve_id, + # TODO severity + "severity": "LOW", + # TODO description + "description": "TODO", + # TODO score + "score": 0, + # TODO CVSS_version + "CVSS_version": 3, + # TODO CVSS_vector + "CVSS_vector": "", + # TODO Ideally this comes from bisecting and pinpointing the + # bug's introduction to the codebase + "last_modified": "", + } + for _namespace, cve in env_config.namespaces.items() + ] + + with self.cve_db.with_cursor() as cursor: + for _namespace, cve in env_config.namespaces.items(): + cursor.execute(f"DELETE from cve_range where product='{cve.product}'") + with self.cve_db.with_cursor() as cursor: + self.cve_db.populate_cve_metrics(severity_data, cursor) + self.cve_db.populate_severity(severity_data, cursor, data_source) + self.cve_db.populate_affected(affected_data, cursor, data_source) + + for _namespace, cve in env_config.namespaces.items(): + yield ScanInfo( + ProductInfo( + cve.vendor, + cve.product, + cve.version, + cve.location, + PackageURL( + type="ad-hoc", + namespace=cve.vendor, + name=re.sub(r"[^a-zA-Z0-9._-]", "", cve.product).lower(), + version=cve.version, + qualifiers={}, + subpath=None, + ), + ), + pathlib.Path(filename).resolve(), + ) + + # TODO VEX attached via linked data to ad-hoc CVE-ID diff --git a/cve_bin_tool/parsers/static_analysis_bandit.py b/cve_bin_tool/parsers/static_analysis_bandit.py new file mode 100644 index 0000000000..d6991bc13f --- /dev/null +++ b/cve_bin_tool/parsers/static_analysis_bandit.py @@ -0,0 +1,183 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: GPL-3.0-or-later + +import sys +import json +import uuid +import re +import pathlib +import subprocess +import contextlib +import dataclasses +from re import MULTILINE, compile, search + +from packageurl import PackageURL +from packaging.version import parse as parse_version + +from cve_bin_tool.parsers import Parser +from cve_bin_tool.strings import parse_strings +from cve_bin_tool.util import ProductInfo, ScanInfo + + +@dataclasses.dataclass +class BanditNamespaceConfig: + ad_hoc_cve_id: str + vendor: str + product: str + version: str + location: str + description: str + severity: str + score: float + + +@dataclasses.dataclass +class BanditConfig: + namespaces: dict[str, BanditNamespaceConfig] + + +class BanditParser(Parser): + """ + Parser for Python requirements files. + This parser is designed to parse Python requirements files (usually named + requirements.txt) and generate PURLs (Package URLs) for the listed packages. + """ + + PARSER_MATCH_FILENAMES = [ + ".py", + ] + + @staticmethod + def parse_bandit_output(contents): + vendor = "TODO-myvendor" + product = "TODO-myproduct" + version = f"v0.0.0.dev-SomeShaValue-N-Other-Branches-Workload-ID-Scan-Number-{uuid.uuid4()}" + + contents = json.loads(contents) + + errors = contents.get("errors", []) + if errors: + raise Exception(json.dumps(contents)) + + namespaces = {} + for i, result in enumerate(contents.get("results", [])): + # Version is the same when code at location matches code from output + result["issue_text"] + result["code"] + + # TODO Replace UUID with with SCITT URN + # SCITT A.4.2 + ad_hoc_cve_id = f"CVE-0001-urn:ietf:params:scitt:statement:sha-256:base64url:5i6UeRzg1...{i}...qnGmr1o" + + # TODO Sort by something, line? Int of content address? + namespace = f"bandit-{i}" + + # TODO Take vendor product and version automatically from git repo + # or installed pypi package meta-info. + namespaces[namespace] = BanditNamespaceConfig( + ad_hoc_cve_id=ad_hoc_cve_id, + vendor=vendor, + product=product, + version=version, + severity="LOW", + score=0.0, + location=result["line_number"], + description=json.dumps(result), + ) + return BanditConfig(namespaces=namespaces) + + def run_checker(self, filename): + """ + Parse the .bandit file and yield ScanInfo objects for the listed packages. + Args: + filename (str): The path to the .bandit file. + Yields: + str: ScanInfo objects for the packages listed in the file. + """ + file_path = pathlib.Path(filename).resolve() + cmd = [ + sys.executable, + "-um", + "bandit", + "-f", + "json", + "--exit-zero", + "--", + # TODO Relative paths? Need top level directory being scanned + str(file_path), + ] + try: + stdout = subprocess.check_output( + cmd, + ) + except subprocess.CalledProcessError as error: + raise Exception(error.stderr) from error + + bandit_config = self.parse_bandit_output(stdout) + + # TODO Create SCITT_URN_FOR_MANIFEST_OF_EXECUTED_WORKFLOW_WITH_SARIF_OUTPUTS_DEREFERENCEABLE + # by making a request to the poligy engine and getting it's workflow + # manifest as output and deriving from that or extend it to return that. + data_source = "SCITT_URN_FOR_MANIFEST_OF_EXECUTED_WORKFLOW_WITH_SARIF_OUTPUTS_DEREFERENCEABLE" + + affected_data = [] + severity_data = [] + + for _namespace, cve in bandit_config.namespaces.items(): + affected_data.append( + { + "cve_id": cve.ad_hoc_cve_id, + "vendor": cve.vendor, + "product": cve.product, + # TODO Version MUST be unique to this bug! + "version": cve.version, + "versionStartIncluding": "", + # "versionStartIncluding": cve.version, + "versionStartExcluding": "", + "versionEndIncluding": "", + # "versionEndIncluding": cve.version, + "versionEndExcluding": "", + } + ) + severity_data.append( + { + "ID": cve.ad_hoc_cve_id, + # TODO severity + "severity": cve.severity, + # TODO description + "description": cve.description, + # TODO score + "score": 0, + # TODO CVSS_version + "CVSS_version": 3, + # TODO CVSS_vector + "CVSS_vector": "", + # TODO Ideally this comes from bisecting and pinpointing the + # bug's introduction to the codebase + "last_modified": "", + } + ) + + with self.cve_db.with_cursor() as cursor: + self.cve_db.populate_cve_metrics(severity_data, cursor) + self.cve_db.populate_severity(severity_data, cursor, data_source) + self.cve_db.populate_affected(affected_data, cursor, data_source) + + product_info = ProductInfo( + cve.vendor, + cve.product, + cve.version, + cve.location, + PackageURL( + type="ad-hoc", + namespace=cve.vendor, + name=re.sub(r"[^a-zA-Z0-9._-]", "", cve.product).lower(), + version=cve.version, + qualifiers={}, + subpath=None, + ), + ) + for _namespace, cve in bandit_config.namespaces.items(): + yield ScanInfo(product_info, pathlib.Path(filename).resolve()) + + # TODO VEX attached via linked data to ad-hoc CVE-ID diff --git a/doc/PARSERS.rst b/doc/PARSERS.rst new file mode 100644 index 0000000000..2438e365eb --- /dev/null +++ b/doc/PARSERS.rst @@ -0,0 +1,169 @@ +Adding a new parser to cve-bin-tool +=================================== + +Overview +-------- + +This document details the functionality added by the latest patch to the parsers in the project. The patch introduces and tests various parsers for different programming languages and ensures they correctly identify and handle specific file types. + +The addition of these parsers enhances the capability of the project to handle a variety of file types associated with different programming languages. The comprehensive test ensures that these parsers are correctly mapped and function as expected. + +Parsers +------- + +The following parsers have been added to the project: + +- **DartParser** +- **GoParser** +- **JavaParser** +- **JavascriptParser** +- **PerlParser** +- **PhpParser** +- **PythonParser** +- **PythonRequirementsParser** +- **RParser** +- **RubyParser** +- **RustParser** +- **SwiftParser** +- **BanditParser** + +Usage +----- + +To utilize these parsers, ensure that your project includes the following imports: + +.. code-block:: python + + from cve_bin_tool.parsers.dart import DartParser + from cve_bin_tool.parsers.go import GoParser + from cve_bin_tool.parsers.java import JavaParser + from cve_bin_tool.parsers.javascript import JavascriptParser + from cve_bin_tool.parsers.perl import PerlParser + from cve_bin_tool.parsers.php import PhpParser + from cve_bin_tool.parsers.python import PythonParser, PythonRequirementsParser + from cve_bin_tool.parsers.r import RParser + from cve_bin_tool.parsers.ruby import RubyParser + from cve_bin_tool.parsers.rust import RustParser + from cve_bin_tool.parsers.swift import SwiftParser + from cve_bin_tool.parsers.bandit import BanditParser + +Setting Up a New Package and Entry Point +---------------------------------------- + +To implement a new parser plugin, such as a Bandit parser, follow these steps: + +1. Create the Parser Class +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +First, create the parser class. This class should be located in the appropriate directory within your project. For example, you might place it in ``cve_bin_tool_parser_env/env.py``. + +.. literalinclude:: /../cve_bin_tool/parsers/env.py + +2. Set Up ``setup.py`` +^^^^^^^^^^^^^^^^^^^^^^ + +Next, configure the ``setup.py`` file boilerplate. + +.. code-block:: python + + # setup.py + + import sys + import site + import setuptools + + # See https://github.com/pypa/pip/issues/7953 + site.ENABLE_USER_SITE = "--user" in sys.argv[1:] + + setuptools.setup(use_scm_version=True) + +3. Set Up ``setup.cfg`` +^^^^^^^^^^^^^^^^^^^^^^^ + +Next, configure the ``setup.cfg`` file to include your new parser as an entry point. This allows the parser to be dynamically discovered and used by the project. + +.. code-block:: ini + + # setup.cfg + + [metadata] + name = cve-bin-tool-parser-env + version = 1.0.0 + description = CVE Binary Tool Parser Plugin: .env + + [options] + packages = find: + entry_points = file: entry_points.txt + setup_requires = + setuptools_scm[toml]>=3.4.3 + +4. Create ``entry_points.txt`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You may also need to configure an ``entry_points.txt`` file if your project uses it to manage entry points. + +.. code-block:: ini + + # entry_points.txt + + [cve_bin_tool.parsers] + env = cve_bin_tool_parser_env.env:EnvParser + +5. Install your plugin +^^^^^^^^^^^^^^^^^^^^^^ + +You need to activate your virtualenv before installing if you set one up. + +.. code-block:: console + + $ python -m pip install -e . + +6. Populate the to-be-parsed file +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In this example we implemented the ``EnvParser`` which is the format of +``/etc/environment``. + +.. literalinclude:: /../test/parser_env_test_0001.env + +7. Run ``cve-bin-tool`` and see your plugin's findings +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Let's test that our defined CVE comes up by scanning a ``.env`` file. + +.. code-block:: console + + $ cve-bin-tool --log debug setup.py + +Advanced Example: Ad-Hoc CVEs +----------------------------- + +For more information see: https://github.com/ossf/wg-vulnerability-disclosures/issues/94 + +1. Create the Parser Class +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +First, create the parser class. This class should be located in the appropriate directory within your project. For example, you might place it in ``cve_bin_tool_parser_static_analysis_bandit/bandit.py``. + +.. literalinclude:: /../cve_bin_tool/parsers/static_analysis_bandit.py + +2. Run ``cve-bin-tool`` and see your plugin's findings +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In this example we implemented the ``BanditParser`` which is a static +analysis tool for Python files. We'll test that it loads by scanning +a ``.py`` file. + +.. code-block:: console + + $ cve-bin-tool --log debug setup.py + +Test Implementation +------------------- + +A new test class `TestParsers` has been introduced to verify that the expected file types are correctly mapped to their respective parsers. The test ensures that the actual valid files match the expected valid files. + +Test Method +^^^^^^^^^^^ + +- `test_parser_match_filenames_results_in_correct_valid_files`: This test compares the `EXPECTED_VALID_FILES` dictionary with the `actual_valid_files` dictionary imported from `cve_bin_tool.parsers.parse`. If there is any discrepancy between the two, the test will fail, indicating that the loaded file types do not match the expected registered file types. diff --git a/doc/index.rst b/doc/index.rst index a0de036e1a..798c77d831 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -19,6 +19,7 @@ The CVE Binary Tool helps you determine if your system includes known vulnerabil RELEASE.md CONTRIBUTING.md CHECKERS.md + PARSERS.rst sboms_for_humans/README.md new-contributor-tips.md pypi_downloads.md diff --git a/test/parser_env_test_0001.env b/test/parser_env_test_0001.env new file mode 100644 index 0000000000..5935efa108 --- /dev/null +++ b/test/parser_env_test_0001.env @@ -0,0 +1,4 @@ +CVE_BIN_TOOL_0_PRODUCT="myproduct" +CVE_BIN_TOOL_0_VENDOR="myvendor" +CVE_BIN_TOOL_0_VERSION="v0.0.0.dev-15abff2d529396937e18c657ecee1ed224842000" +CVE_BIN_TOOL_0_AD_HOC_CVE_ID="CVE-0001-15004435-aa84-43ff-9c26-f703a26069f8" diff --git a/test/test_parsers.py b/test/test_parsers.py index 09e6e88c98..bdb7460b4e 100644 --- a/test/test_parsers.py +++ b/test/test_parsers.py @@ -1,6 +1,20 @@ -import pytest +import os +import re +import sys +import shutil +import atexit +import pathlib +import textwrap +import tempfile import unittest +import contextlib + +import pytest +from packageurl import PackageURL +from cve_bin_tool.cvedb import CVEDB +from cve_bin_tool.util import ProductInfo, ScanInfo +from cve_bin_tool.log import LOGGER from cve_bin_tool.parsers.parse import valid_files as actual_valid_files from cve_bin_tool.parsers.dart import DartParser from cve_bin_tool.parsers.go import GoParser @@ -13,7 +27,16 @@ from cve_bin_tool.parsers.ruby import RubyParser from cve_bin_tool.parsers.rust import RustParser from cve_bin_tool.parsers.swift import SwiftParser +from cve_bin_tool.parsers.env import EnvParser +cve_db = CVEDB() +logger = LOGGER.getChild(__name__) + +stack = contextlib.ExitStack().__enter__() +tmpdir = stack.enter_context( + tempfile.TemporaryDirectory(prefix="cve-bin-tool-TEST_ENV") +) +atexit.register(lambda: stack.__exit__(None, None, None)) EXPECTED_VALID_FILES = { "pom.xml": [JavaParser], @@ -29,10 +52,17 @@ "composer.lock": [PhpParser], "cpanfile": [PerlParser], "pubspec.lock": [DartParser], + ".env": [EnvParser], } +PARSER_ENV_TEST_0001_ENV_CONTENTS = ( + pathlib.Path(__file__).parent.joinpath("parser_env_test_0001.env").read_text() +) + class TestParsers: + maxDiff = None + @pytest.mark.asyncio async def test_parser_match_filenames_results_in_correct_valid_files(self): unittest.TestCase().assertDictEqual( @@ -40,3 +70,34 @@ async def test_parser_match_filenames_results_in_correct_valid_files(self): actual_valid_files, "Expected registered file types not the same as loaded file types, second dict is actual file types loaded, first is expected", ) + + @pytest.mark.asyncio + async def test_parser_env_test_0001(self): + file_path = pathlib.Path(tmpdir, ".env") + file_path.write_text(PARSER_ENV_TEST_0001_ENV_CONTENTS) + env_parser = EnvParser(cve_db, logger) + results = list(env_parser.run_checker(file_path)) + unittest.TestCase().assertListEqual( + results, + [ + ScanInfo( + product_info=ProductInfo( + vendor="myvendor", + product="myproduct", + version="v0.0.0.dev-15abff2d529396937e18c657ecee1ed224842000", + # TODO location? + location="/usr/local/bin/product", + # TODO purl + purl=PackageURL( + type="ad-hoc", + namespace="myvendor", + name=re.sub(r"[^a-zA-Z0-9._-]", "", "myproduct").lower(), + version="v0.0.0.dev-15abff2d529396937e18c657ecee1ed224842000", + qualifiers={}, + subpath=None, + ), + ), + file_path=file_path, + ) + ], + )