diff --git a/cve_bin_tool/parsers/env.py b/cve_bin_tool/parsers/env.py new file mode 100644 index 0000000000..ec6b2b1716 --- /dev/null +++ b/cve_bin_tool/parsers/env.py @@ -0,0 +1,182 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: GPL-3.0-or-later + +import json +import re +import subprocess +from re import MULTILINE, compile, search + +from packaging.version import parse as parse_version + +from cve_bin_tool.parsers import Parser +from cve_bin_tool.strings import parse_strings +from cve_bin_tool.util import ProductInfo, ScanInfo + + +class PythonRequirementsParser(Parser): + """ + Parser for Python requirements files. + This parser is designed to parse Python requirements files (usually named + requirements.txt) and generate PURLs (Package URLs) for the listed packages. + """ + + PARSER_MATCH_FILENAMES = [ + "requirements.txt", + ] + + def __init__(self, cve_db, logger): + """Initialize the python requirements file parser.""" + super().__init__(cve_db, logger) + self.purl_pkg_type = "pypi" + + def generate_purl(self, product, vendor, qualifier={}, subpath=None): + """Generates PURL after normalizing all components.""" + product = re.sub(r"[^a-zA-Z0-9._-]", "", product).lower() + + if not product: + return None + + purl = super().generate_purl( + product, + vendor, + qualifier, + subpath, + ) + + return purl + + def run_checker(self, filename): + """ + Parse the requirements file and yield PURLs for the listed packages. + Args: + filename (str): The path to the requirements file. + Yields: + str: PURLs for the packages listed in the file. + """ + self.filename = filename + try: + output = subprocess.check_output( + [ + "pip3", + "install", + "-r", + self.filename, + "--dry-run", + "--ignore-installed", + "--report", + "-", + "--quiet", + ], + stderr=subprocess.STDOUT, + ) + except subprocess.CalledProcessError as e: + self.logger.error(e.output) + pip_version = str(subprocess.check_output(["pip3", "--version"])) + # Output will look like: + # pip 20.0.2 from /usr/lib/python3/dist-packages/pip (python 3.8) + pip_version = pip_version.split(" ")[1] + if parse_version(pip_version) < parse_version("22.2"): + self.logger.error( + f"{filename} not scanned: pip --dry-run was unable to get package versions." + ) + self.logger.error( + "pip version >= 22.2 is required to scan Python requirements files." + ) + else: + output = subprocess.check_output( + [ + "pip3", + "install", + "-r", + self.filename, + "--dry-run", + "--ignore-installed", + "--report", + "-", + "--quiet", + ], + ) + lines = json.loads(output) + for line in lines["install"]: + product = line["metadata"]["name"] + version = line["metadata"]["version"] + purl = self.generate_purl(product, "") + vendor, result = self.find_vendor_from_purl(purl, version) + + if not result: + vendor = self.find_vendor(product, version) + + if vendor is not None: + yield from vendor + self.logger.debug(f"Done scanning file: {self.filename}") + + +class PythonParser(Parser): + """ + Parser for Python package metadata files. + This parser is designed to parse Python package metadata files (usually named + PKG-INFO or METADATA) and generate PURLs (Package URLs) for the package. + """ + + PARSER_MATCH_FILENAMES = [ + "PKG-INFO: ", + "METADATA: ", + ] + + def __init__(self, cve_db, logger): + """Initialize the python package metadata parser.""" + super().__init__(cve_db, logger) + self.purl_pkg_type = "pypi" + + def generate_purl(self, product, vendor, qualifier={}, subpath=None): + """Generates PURL after normalizing all components.""" + product = re.sub(r"[^a-zA-Z0-9._-]", "", product).lower() + + if not product: + return None + + purl = super().generate_purl( + product, + vendor, + qualifier, + subpath, + ) + + return purl + + def run_checker(self, filename): + """ + This generator runs only for python packages. + There are no actual checkers. + The ProductInfo is computed without the help of any checkers from PKG-INFO or METADATA. + """ + self.filename = filename + lines = parse_strings(self.filename) + lines = "\n".join(lines.splitlines()[:3]) + try: + product = search(compile(r"^Name: (.+)$", MULTILINE), lines).group(1) + version = search(compile(r"^Version: (.+)$", MULTILINE), lines).group(1) + purl = self.generate_purl(product, "") + vendor, result = self.find_vendor_from_purl(purl, version) + + if vendor is not None: + yield from vendor + + if not result: + vendor_package_pair = self.cve_db.get_vendor_product_pairs(product) + if vendor_package_pair != []: + for pair in vendor_package_pair: + vendor = pair["vendor"] + location = pair.get("location", "/usr/local/bin/product") + file_path = self.filename + self.logger.debug( + f"{file_path} is {vendor}.{product} {version}" + ) + yield ScanInfo( + ProductInfo(vendor, product, version, location), file_path + ) + + # There are packages with a METADATA file in them containing different data from what the tool expects + except AttributeError: + self.logger.debug(f"{filename} is an invalid METADATA/PKG-INFO") + self.logger.debug(f"Done scanning file: {filename}") diff --git a/test/test_parsers.py b/test/test_parsers.py index 09e6e88c98..c01119d810 100644 --- a/test/test_parsers.py +++ b/test/test_parsers.py @@ -1,6 +1,14 @@ -import pytest +import os +import sys +import shutil +import atexit +import tempfile import unittest +import pytest + +from cve_bin_tool.cvedb import CVEDB +from cve_bin_tool.log import LOGGER from cve_bin_tool.parsers.parse import valid_files as actual_valid_files from cve_bin_tool.parsers.dart import DartParser from cve_bin_tool.parsers.go import GoParser @@ -13,7 +21,13 @@ from cve_bin_tool.parsers.ruby import RubyParser from cve_bin_tool.parsers.rust import RustParser from cve_bin_tool.parsers.swift import SwiftParser +from cve_bin_tool.parsers.env import EnvParser + +cve_db = CVEDB() +logger = LOGGER.getChild(__name__) +tmpdir = tempfile.mkdtemp(prefix="cve-bin-tool-FUZZ_PYTHON_REQUIREMENTS") +atexit.register(lambda: ) EXPECTED_VALID_FILES = { "pom.xml": [JavaParser], @@ -29,6 +43,7 @@ "composer.lock": [PhpParser], "cpanfile": [PerlParser], "pubspec.lock": [DartParser], + ".env": [EnvParser], } @@ -40,3 +55,47 @@ async def test_parser_match_filenames_results_in_correct_valid_files(self): actual_valid_files, "Expected registered file types not the same as loaded file types, second dict is actual file types loaded, first is expected", ) + + @pytest.mark.asyncio + async def test_parser_match_filenames_results_in_correct_valid_files(self): + try: + json_data = MessageToDict( + data, preserving_proto_field_name=True, including_default_value_fields=True + ) + file_path = os.path.join(tmpdir, "requirements.txt") + with open(file_path, "w") as f: + for dict in json_data.get("packages", []): + extras = "" + if len(dict["extras"]) > 0: + extras = f"[{','.join(dict['extras'])}]" + + constraint = "" + if "version" in dict.keys(): + constraint = f" == {dict['version']}" + elif "url" in dict.keys(): + constraint = f"@{dict['url']}" + + f.write(f"{dict['name']}{extras}{constraint}\n") + + PRP = PythonRequirementsParser(cve_db, logger) + PRP.run_checker(file_path) + + except SystemExit: + return + + +def main(): + try: + atheris_libprotobuf_mutator.Setup( + sys.argv, + lambda data: TestParseData(data, cve_db, logger, tmpdir), + proto=python_requirements_pb2.PackageList, + ) + atheris.Fuzz() + finally: + if os.path.exists(tmpdir): + shutil.rmtree(tmpdir) + + +if __name__ == "__main__": + main()