diff --git a/cve_bin_tool/parsers/__init__.py b/cve_bin_tool/parsers/__init__.py index 843b210fb1..1e39cc33f5 100644 --- a/cve_bin_tool/parsers/__init__.py +++ b/cve_bin_tool/parsers/__init__.py @@ -12,6 +12,7 @@ from cve_bin_tool.util import ProductInfo, ScanInfo __all__ = [ + "parse", "Parser", "java", "javascript", diff --git a/cve_bin_tool/parsers/dart.py b/cve_bin_tool/parsers/dart.py index b84847395a..88f30d866a 100644 --- a/cve_bin_tool/parsers/dart.py +++ b/cve_bin_tool/parsers/dart.py @@ -15,6 +15,10 @@ class DartParser(Parser): https://dart.dev/overview """ + PARSER_MATCH_FILENAMES = [ + "pubspec.lock", + ] + def __init__(self, cve_db, logger): super().__init__(cve_db, logger) self.purl_pkg_type = "pub" diff --git a/cve_bin_tool/parsers/go.py b/cve_bin_tool/parsers/go.py index d94751a725..ab556c84c9 100644 --- a/cve_bin_tool/parsers/go.py +++ b/cve_bin_tool/parsers/go.py @@ -25,6 +25,10 @@ class GoParser(Parser): """ + PARSER_MATCH_FILENAMES = [ + "go.mod", + ] + def __init__(self, cve_db, logger): super().__init__(cve_db, logger) self.purl_pkg_type = "golang" diff --git a/cve_bin_tool/parsers/java.py b/cve_bin_tool/parsers/java.py index 9677c49b3c..f31f9182a6 100644 --- a/cve_bin_tool/parsers/java.py +++ b/cve_bin_tool/parsers/java.py @@ -13,6 +13,10 @@ class JavaParser(Parser): """Class to handle parsing Java-based Packages.""" + PARSER_MATCH_FILENAMES = [ + "pom.xml", + ] + def __init__(self, cve_db, logger, validate=True): super().__init__(cve_db, logger) self.validate = validate diff --git a/cve_bin_tool/parsers/javascript.py b/cve_bin_tool/parsers/javascript.py index 023eddc8d3..6332f4b7cc 100644 --- a/cve_bin_tool/parsers/javascript.py +++ b/cve_bin_tool/parsers/javascript.py @@ -11,6 +11,10 @@ class JavascriptParser(Parser): """Parser for javascript's package-lock.json files""" + PARSER_MATCH_FILENAMES = [ + "package-lock.json", + ] + def __init__(self, cve_db, logger): super().__init__(cve_db, logger) self.purl_pkg_type = "npm" diff --git a/cve_bin_tool/parsers/parse.py b/cve_bin_tool/parsers/parse.py index acb8fc328a..ec1e87985f 100644 --- a/cve_bin_tool/parsers/parse.py +++ b/cve_bin_tool/parsers/parse.py @@ -1,40 +1,45 @@ # Copyright (C) 2022 Intel Corporation # SPDX-License-Identifier: GPL-3.0-or-later +import sys -from cve_bin_tool.parsers.dart import DartParser -from cve_bin_tool.parsers.go import GoParser -from cve_bin_tool.parsers.java import JavaParser -from cve_bin_tool.parsers.javascript import JavascriptParser -from cve_bin_tool.parsers.perl import PerlParser -from cve_bin_tool.parsers.php import PhpParser -from cve_bin_tool.parsers.python import PythonParser, PythonRequirementsParser -from cve_bin_tool.parsers.r import RParser -from cve_bin_tool.parsers.ruby import RubyParser -from cve_bin_tool.parsers.rust import RustParser -from cve_bin_tool.parsers.swift import SwiftParser - -valid_files = { - "pom.xml": JavaParser, - "package-lock.json": JavascriptParser, - "Cargo.lock": RustParser, - "renv.lock": RParser, - "requirements.txt": PythonRequirementsParser, - "go.mod": GoParser, - "PKG-INFO: ": PythonParser, - "METADATA: ": PythonParser, - "Gemfile.lock": RubyParser, - "Package.resolved": SwiftParser, - "composer.lock": PhpParser, - "cpanfile": PerlParser, - "pubspec.lock": DartParser, -} + +if sys.version_info >= (3, 10): + from importlib import metadata as importlib_metadata +else: + import importlib_metadata + +from cve_bin_tool.parsers import Parser + + +PARSERS_ENTRYPOINT = "cve_bin_tool.parsers" + + +def load_valid_files() -> dict[str, list[type[Parser]]]: + """Loads file parsers""" + valid_files = {} + for entrypoint in importlib_metadata.entry_points().select( + group=PARSERS_ENTRYPOINT + ): + parser_cls = entrypoint.load() + for match_filename in getattr(parser_cls, "PARSER_MATCH_FILENAMES", []): + valid_files.setdefault(match_filename, []) + valid_files[match_filename].append(parser_cls) + for match_filename in valid_files: + valid_files[match_filename] = list(set(valid_files[match_filename])) + return valid_files + + +valid_files = load_valid_files() def parse(filename, output, cve_db, logger): """ Parses the given filename using the appropriate parser. """ + parsers = [] for file in list(valid_files.keys()): if file in output: - parser = valid_files[file](cve_db, logger) - yield from parser.run_checker(filename) + for valid_file_parser in valid_files[file]: + parsers.append(valid_file_parser(cve_db, logger)) + for parser in parsers: + yield from parser.run_checker(filename) diff --git a/cve_bin_tool/parsers/perl.py b/cve_bin_tool/parsers/perl.py index 90d6160cf8..df1012a677 100644 --- a/cve_bin_tool/parsers/perl.py +++ b/cve_bin_tool/parsers/perl.py @@ -9,6 +9,10 @@ class PerlParser(Parser): """Parser for perl's cpan files""" + PARSER_MATCH_FILENAMES = [ + "cpanfile", + ] + def __init__(self, cve_db, logger): super().__init__(cve_db, logger) self.purl_pkg_type = "cpan" diff --git a/cve_bin_tool/parsers/php.py b/cve_bin_tool/parsers/php.py index a486d4e5ce..fabe5843cc 100644 --- a/cve_bin_tool/parsers/php.py +++ b/cve_bin_tool/parsers/php.py @@ -14,6 +14,10 @@ class PhpParser(Parser): generate PURLs (Package URLs) for the listed packages. """ + PARSER_MATCH_FILENAMES = [ + "composer.lock", + ] + def __init__(self, cve_db, logger): """Initialize the PhpParser.""" super().__init__(cve_db, logger) diff --git a/cve_bin_tool/parsers/python.py b/cve_bin_tool/parsers/python.py index 534cb55285..ec6b2b1716 100644 --- a/cve_bin_tool/parsers/python.py +++ b/cve_bin_tool/parsers/python.py @@ -20,6 +20,10 @@ class PythonRequirementsParser(Parser): requirements.txt) and generate PURLs (Package URLs) for the listed packages. """ + PARSER_MATCH_FILENAMES = [ + "requirements.txt", + ] + def __init__(self, cve_db, logger): """Initialize the python requirements file parser.""" super().__init__(cve_db, logger) @@ -114,6 +118,11 @@ class PythonParser(Parser): PKG-INFO or METADATA) and generate PURLs (Package URLs) for the package. """ + PARSER_MATCH_FILENAMES = [ + "PKG-INFO: ", + "METADATA: ", + ] + def __init__(self, cve_db, logger): """Initialize the python package metadata parser.""" super().__init__(cve_db, logger) diff --git a/cve_bin_tool/parsers/r.py b/cve_bin_tool/parsers/r.py index cbaac0d548..e162f956e9 100644 --- a/cve_bin_tool/parsers/r.py +++ b/cve_bin_tool/parsers/r.py @@ -26,6 +26,10 @@ class RParser(Parser): """ + PARSER_MATCH_FILENAMES = [ + "renv.lock", + ] + def __init__(self, cve_db, logger): super().__init__(cve_db, logger) self.purl_pkg_type = "cran" diff --git a/cve_bin_tool/parsers/ruby.py b/cve_bin_tool/parsers/ruby.py index b59b53dd2c..7f21fcf6e2 100644 --- a/cve_bin_tool/parsers/ruby.py +++ b/cve_bin_tool/parsers/ruby.py @@ -25,6 +25,10 @@ class RubyParser(Parser): """ + PARSER_MATCH_FILENAMES = [ + "Gemfile.lock", + ] + def __init__(self, cve_db, logger): super().__init__(cve_db, logger) self.purl_pkg_type = "gem" diff --git a/cve_bin_tool/parsers/rust.py b/cve_bin_tool/parsers/rust.py index 46e1c00fd1..3b34b5fcc5 100644 --- a/cve_bin_tool/parsers/rust.py +++ b/cve_bin_tool/parsers/rust.py @@ -24,6 +24,10 @@ class RustParser(Parser): Parse the Rust dependency file and yield valid PURLs for the packages listed in the file. """ + PARSER_MATCH_FILENAMES = [ + "Cargo.lock", + ] + def __init__(self, cve_db, logger): super().__init__(cve_db, logger) self.purl_pkg_type = "cargo" diff --git a/cve_bin_tool/parsers/swift.py b/cve_bin_tool/parsers/swift.py index 3e3b8e130f..0d395ac9df 100644 --- a/cve_bin_tool/parsers/swift.py +++ b/cve_bin_tool/parsers/swift.py @@ -28,6 +28,10 @@ class SwiftParser(Parser): """ + PARSER_MATCH_FILENAMES = [ + "Package.resolved", + ] + def __init__(self, cve_db, logger): super().__init__(cve_db, logger) self.purl_pkg_type = "swift" diff --git a/cve_bin_tool/version_scanner.py b/cve_bin_tool/version_scanner.py index 29b27d56e8..8fb65f5e53 100644 --- a/cve_bin_tool/version_scanner.py +++ b/cve_bin_tool/version_scanner.py @@ -3,6 +3,7 @@ from __future__ import annotations import subprocess +import itertools import sys from logging import Logger from pathlib import Path, PurePath @@ -129,16 +130,7 @@ def number_of_checkers(self) -> int: @classmethod def available_language_checkers(cls) -> list[str]: """Find Language checkers""" - language_directory = resources.files(cls.LANGUAGE_CHECKER_ENTRYPOINT) - parsers = language_directory.iterdir() - language_checkers = [] - for parser in parsers: - if str(parser).endswith(".py"): - language = PurePath(parser).name.replace(".py", "").capitalize() - if language not in ["__init__", "Parse"]: - language_checkers.append(language) - - return sorted(language_checkers) + return list(sorted(map(str, set(itertools.chain(*valid_files.values()))))) def print_language_checkers(self) -> None: """Logs the message that lists the names of the language checkers""" diff --git a/setup.py b/setup.py index 5f0ba969c6..d018e96341 100644 --- a/setup.py +++ b/setup.py @@ -2,10 +2,14 @@ # SPDX-License-Identifier: GPL-3.0-or-later import ast +import re import os +import pathlib from setuptools import find_packages, setup +PACKAGE_ROOT_PATH = pathlib.Path(__file__).parent + with open("README.md", encoding="utf-8") as f: readme = f.read() @@ -18,6 +22,30 @@ VERSION = ast.literal_eval(line.strip().split("=")[-1].strip()) break + +def enumerate_entry_points_parsers(): + """Reads the files in cve_bin_tool/parsers/to auto determine list""" + parsers = {} + for path in PACKAGE_ROOT_PATH.joinpath( + "cve_bin_tool", + "parsers", + ).glob("*.py"): + if "__init__" == path.stem: + continue + contents = path.read_text() + for re_match in re.finditer(r"^class (\w+)", contents, re.MULTILINE): + parser_cls_name = re_match[1] + parsers[".".join([path.stem, parser_cls_name])] = ":".join( + [ + str(path.relative_to(PACKAGE_ROOT_PATH).with_suffix("")).replace( + os.path.sep, "." + ), + parser_cls_name, + ], + ) + return parsers + + setup_kwargs = dict( name="cve-bin-tool", version=VERSION, @@ -89,6 +117,16 @@ ) if filename.endswith(".py") and "__init__" not in filename ], + "cve_bin_tool.parsers": [ + "{} = {}".format( + parser_entry_point_name, + entry_point_path, + ) + for ( + parser_entry_point_name, + entry_point_path, + ) in enumerate_entry_points_parsers().items() + ], }, ) diff --git a/test/test_parsers.py b/test/test_parsers.py new file mode 100644 index 0000000000..09e6e88c98 --- /dev/null +++ b/test/test_parsers.py @@ -0,0 +1,42 @@ +import pytest +import unittest + +from cve_bin_tool.parsers.parse import valid_files as actual_valid_files +from cve_bin_tool.parsers.dart import DartParser +from cve_bin_tool.parsers.go import GoParser +from cve_bin_tool.parsers.java import JavaParser +from cve_bin_tool.parsers.javascript import JavascriptParser +from cve_bin_tool.parsers.perl import PerlParser +from cve_bin_tool.parsers.php import PhpParser +from cve_bin_tool.parsers.python import PythonParser, PythonRequirementsParser +from cve_bin_tool.parsers.r import RParser +from cve_bin_tool.parsers.ruby import RubyParser +from cve_bin_tool.parsers.rust import RustParser +from cve_bin_tool.parsers.swift import SwiftParser + + +EXPECTED_VALID_FILES = { + "pom.xml": [JavaParser], + "package-lock.json": [JavascriptParser], + "Cargo.lock": [RustParser], + "renv.lock": [RParser], + "requirements.txt": [PythonRequirementsParser], + "go.mod": [GoParser], + "PKG-INFO: ": [PythonParser], + "METADATA: ": [PythonParser], + "Gemfile.lock": [RubyParser], + "Package.resolved": [SwiftParser], + "composer.lock": [PhpParser], + "cpanfile": [PerlParser], + "pubspec.lock": [DartParser], +} + + +class TestParsers: + @pytest.mark.asyncio + async def test_parser_match_filenames_results_in_correct_valid_files(self): + unittest.TestCase().assertDictEqual( + EXPECTED_VALID_FILES, + actual_valid_files, + "Expected registered file types not the same as loaded file types, second dict is actual file types loaded, first is expected", + )