Skip to content

Commit

Permalink
cp cve_bin_tool/parsers/python.py cve_bin_tool/parsers/custom_via_env.py
Browse files Browse the repository at this point in the history
Signed-off-by: John <[email protected]>
  • Loading branch information
pdxjohnny committed Jun 16, 2024
1 parent b08bf91 commit 089c265
Show file tree
Hide file tree
Showing 2 changed files with 242 additions and 1 deletion.
182 changes: 182 additions & 0 deletions cve_bin_tool/parsers/env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: GPL-3.0-or-later

import json
import re
import subprocess
from re import MULTILINE, compile, search

from packaging.version import parse as parse_version

from cve_bin_tool.parsers import Parser
from cve_bin_tool.strings import parse_strings
from cve_bin_tool.util import ProductInfo, ScanInfo


class PythonRequirementsParser(Parser):
"""
Parser for Python requirements files.
This parser is designed to parse Python requirements files (usually named
requirements.txt) and generate PURLs (Package URLs) for the listed packages.
"""

PARSER_MATCH_FILENAMES = [
"requirements.txt",
]

def __init__(self, cve_db, logger):
"""Initialize the python requirements file parser."""
super().__init__(cve_db, logger)
self.purl_pkg_type = "pypi"

def generate_purl(self, product, vendor, qualifier={}, subpath=None):
"""Generates PURL after normalizing all components."""
product = re.sub(r"[^a-zA-Z0-9._-]", "", product).lower()

if not product:
return None

purl = super().generate_purl(
product,
vendor,
qualifier,
subpath,
)

return purl

def run_checker(self, filename):
"""
Parse the requirements file and yield PURLs for the listed packages.
Args:
filename (str): The path to the requirements file.
Yields:
str: PURLs for the packages listed in the file.
"""
self.filename = filename
try:
output = subprocess.check_output(
[
"pip3",
"install",
"-r",
self.filename,
"--dry-run",
"--ignore-installed",
"--report",
"-",
"--quiet",
],
stderr=subprocess.STDOUT,
)
except subprocess.CalledProcessError as e:
self.logger.error(e.output)
pip_version = str(subprocess.check_output(["pip3", "--version"]))
# Output will look like:
# pip 20.0.2 from /usr/lib/python3/dist-packages/pip (python 3.8)
pip_version = pip_version.split(" ")[1]
if parse_version(pip_version) < parse_version("22.2"):
self.logger.error(
f"{filename} not scanned: pip --dry-run was unable to get package versions."
)
self.logger.error(
"pip version >= 22.2 is required to scan Python requirements files."
)
else:
output = subprocess.check_output(
[
"pip3",
"install",
"-r",
self.filename,
"--dry-run",
"--ignore-installed",
"--report",
"-",
"--quiet",
],
)
lines = json.loads(output)
for line in lines["install"]:
product = line["metadata"]["name"]
version = line["metadata"]["version"]
purl = self.generate_purl(product, "")
vendor, result = self.find_vendor_from_purl(purl, version)

if not result:
vendor = self.find_vendor(product, version)

if vendor is not None:
yield from vendor
self.logger.debug(f"Done scanning file: {self.filename}")


class PythonParser(Parser):
"""
Parser for Python package metadata files.
This parser is designed to parse Python package metadata files (usually named
PKG-INFO or METADATA) and generate PURLs (Package URLs) for the package.
"""

PARSER_MATCH_FILENAMES = [
"PKG-INFO: ",
"METADATA: ",
]

def __init__(self, cve_db, logger):
"""Initialize the python package metadata parser."""
super().__init__(cve_db, logger)
self.purl_pkg_type = "pypi"

def generate_purl(self, product, vendor, qualifier={}, subpath=None):
"""Generates PURL after normalizing all components."""
product = re.sub(r"[^a-zA-Z0-9._-]", "", product).lower()

if not product:
return None

purl = super().generate_purl(
product,
vendor,
qualifier,
subpath,
)

return purl

def run_checker(self, filename):
"""
This generator runs only for python packages.
There are no actual checkers.
The ProductInfo is computed without the help of any checkers from PKG-INFO or METADATA.
"""
self.filename = filename
lines = parse_strings(self.filename)
lines = "\n".join(lines.splitlines()[:3])
try:
product = search(compile(r"^Name: (.+)$", MULTILINE), lines).group(1)
version = search(compile(r"^Version: (.+)$", MULTILINE), lines).group(1)
purl = self.generate_purl(product, "")
vendor, result = self.find_vendor_from_purl(purl, version)

if vendor is not None:
yield from vendor

if not result:
vendor_package_pair = self.cve_db.get_vendor_product_pairs(product)
if vendor_package_pair != []:
for pair in vendor_package_pair:
vendor = pair["vendor"]
location = pair.get("location", "/usr/local/bin/product")
file_path = self.filename
self.logger.debug(
f"{file_path} is {vendor}.{product} {version}"
)
yield ScanInfo(
ProductInfo(vendor, product, version, location), file_path
)

# There are packages with a METADATA file in them containing different data from what the tool expects
except AttributeError:
self.logger.debug(f"{filename} is an invalid METADATA/PKG-INFO")
self.logger.debug(f"Done scanning file: {filename}")
61 changes: 60 additions & 1 deletion test/test_parsers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
import pytest
import os
import sys
import shutil
import atexit
import tempfile
import unittest

import pytest

from cve_bin_tool.cvedb import CVEDB
from cve_bin_tool.log import LOGGER
from cve_bin_tool.parsers.parse import valid_files as actual_valid_files
from cve_bin_tool.parsers.dart import DartParser
from cve_bin_tool.parsers.go import GoParser
Expand All @@ -13,7 +21,13 @@
from cve_bin_tool.parsers.ruby import RubyParser
from cve_bin_tool.parsers.rust import RustParser
from cve_bin_tool.parsers.swift import SwiftParser
from cve_bin_tool.parsers.env import EnvParser

cve_db = CVEDB()
logger = LOGGER.getChild(__name__)

tmpdir = tempfile.mkdtemp(prefix="cve-bin-tool-FUZZ_PYTHON_REQUIREMENTS")
atexit.register(lambda: )

EXPECTED_VALID_FILES = {
"pom.xml": [JavaParser],
Expand All @@ -29,6 +43,7 @@
"composer.lock": [PhpParser],
"cpanfile": [PerlParser],
"pubspec.lock": [DartParser],
".env": [EnvParser],
}


Expand All @@ -40,3 +55,47 @@ async def test_parser_match_filenames_results_in_correct_valid_files(self):
actual_valid_files,
"Expected registered file types not the same as loaded file types, second dict is actual file types loaded, first is expected",
)

@pytest.mark.asyncio
async def test_parser_match_filenames_results_in_correct_valid_files(self):
try:
json_data = MessageToDict(
data, preserving_proto_field_name=True, including_default_value_fields=True
)
file_path = os.path.join(tmpdir, "requirements.txt")
with open(file_path, "w") as f:
for dict in json_data.get("packages", []):
extras = ""
if len(dict["extras"]) > 0:
extras = f"[{','.join(dict['extras'])}]"

constraint = ""
if "version" in dict.keys():
constraint = f" == {dict['version']}"
elif "url" in dict.keys():
constraint = f"@{dict['url']}"

f.write(f"{dict['name']}{extras}{constraint}\n")

PRP = PythonRequirementsParser(cve_db, logger)
PRP.run_checker(file_path)

except SystemExit:
return


def main():
try:
atheris_libprotobuf_mutator.Setup(
sys.argv,
lambda data: TestParseData(data, cve_db, logger, tmpdir),
proto=python_requirements_pb2.PackageList,
)
atheris.Fuzz()
finally:
if os.path.exists(tmpdir):
shutil.rmtree(tmpdir)


if __name__ == "__main__":
main()

0 comments on commit 089c265

Please sign in to comment.