Skip to content

Commit

Permalink
feat: documentation on out of tree parsers
Browse files Browse the repository at this point in the history
Signed-off-by: John Andersen <[email protected]>
  • Loading branch information
John Andersen authored and pdxjohnny committed Jun 18, 2024
1 parent 020caac commit 757ee07
Show file tree
Hide file tree
Showing 9 changed files with 554 additions and 3 deletions.
9 changes: 9 additions & 0 deletions cve_bin_tool/cvedb.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from __future__ import annotations

import asyncio
import contextlib
import datetime
import json
import logging
Expand Down Expand Up @@ -1193,3 +1194,11 @@ def fetch_from_mirror(self, mirror, pubkey, ignore_signature, log_signature_erro
else:
self.clear_cached_data()
return -1

@contextlib.contextmanager
def with_cursor(self):
cursor = self.db_open_and_get_cursor()
try:
yield cursor
finally:
self.db_close()
1 change: 1 addition & 0 deletions cve_bin_tool/parsers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
"php",
"perl",
"dart",
"env",
]


Expand Down
132 changes: 132 additions & 0 deletions cve_bin_tool/parsers/env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: GPL-3.0-or-later

import dataclasses
import pathlib
import re

from packageurl import PackageURL

from cve_bin_tool.parsers import Parser
from cve_bin_tool.util import ProductInfo, ScanInfo


@dataclasses.dataclass
class EnvNamespaceConfig:
ad_hoc_cve_id: str
vendor: str
product: str
version: str
location: str = "/usr/local/bin/product"


@dataclasses.dataclass
class EnvConfig:
namespaces: dict[str, EnvNamespaceConfig]


class EnvParser(Parser):
"""
Parser for Python requirements files.
This parser is designed to parse Python requirements files (usually named
requirements.txt) and generate PURLs (Package URLs) for the listed packages.
"""

PARSER_MATCH_FILENAMES = [
".env",
]

@staticmethod
def parse_file_contents(contents):
lines = list(
[
line
for line in contents.replace("\r\n", "\n").split("\n")
if line.strip() and line.startswith("CVE_BIN_TOOL_")
]
)
namespaces = {}
for i, line in enumerate(lines):
key, value = line.split("=", maxsplit=1)
namespace, key = key[len("CVE_BIN_TOOL_") :].split("_", maxsplit=1)
if value.startswith('"'):
value = value[1:]
if value.endswith('"'):
value = value[:-1]
namespaces.setdefault(namespace, {})
namespaces[namespace][key.lower()] = value
for namespace, config in namespaces.items():
namespaces[namespace] = EnvNamespaceConfig(**config)
return EnvConfig(namespaces=namespaces)

def run_checker(self, filename):
"""
Parse the .env file and yield ScanInfo objects for the listed packages.
Args:
filename (str): The path to the .env file.
Yields:
str: ScanInfo objects for the packages listed in the file.
"""
self.filename = filename
contents = pathlib.Path(self.filename).read_text()

env_config = self.parse_file_contents(contents)

data_source = "environment"
affected_data = [
{
"cve_id": cve.ad_hoc_cve_id,
"vendor": cve.vendor,
"product": cve.product,
# TODO Version MUST be unique to this bug!
"version": cve.version,
"versionStartIncluding": "",
# "versionStartIncluding": cve.version,
"versionStartExcluding": "",
"versionEndIncluding": "",
# "versionEndIncluding": cve.version,
"versionEndExcluding": "",
}
for _namespace, cve in env_config.namespaces.items()
]
severity_data = [
{
"ID": cve.ad_hoc_cve_id,
# TODO severity
"severity": "LOW",
# TODO description
"description": "TODO",
# TODO score
"score": 0,
# TODO CVSS_version
"CVSS_version": 3,
# TODO CVSS_vector
"CVSS_vector": "",
"last_modified": "",
}
for _namespace, cve in env_config.namespaces.items()
]

with self.cve_db.with_cursor() as cursor:
self.cve_db.populate_cve_metrics(severity_data, cursor)
self.cve_db.populate_severity(severity_data, cursor, data_source)
self.cve_db.populate_affected(affected_data, cursor, data_source)

for _namespace, cve in env_config.namespaces.items():
yield ScanInfo(
ProductInfo(
cve.vendor,
cve.product,
cve.version,
cve.location,
PackageURL(
type="ad-hoc",
namespace=cve.vendor,
name=re.sub(r"[^a-zA-Z0-9._-]", "", cve.product).lower(),
version=cve.version,
qualifiers={},
subpath=None,
),
),
pathlib.Path(filename).resolve(),
)
2 changes: 1 addition & 1 deletion cve_bin_tool/parsers/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def load_valid_files() -> dict[str, list[type[Parser]]]:
return valid_files


valid_files = load_valid_files()
valid_files: dict[str, list[type[Parser]]] = load_valid_files()


def parse(filename, output, cve_db, logger):
Expand Down
179 changes: 179 additions & 0 deletions cve_bin_tool/parsers/static_analysis_bandit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: GPL-3.0-or-later

import dataclasses
import json
import pathlib
import re
import subprocess
import sys
import uuid

from packageurl import PackageURL

from cve_bin_tool.parsers import Parser
from cve_bin_tool.util import ProductInfo, ScanInfo


@dataclasses.dataclass
class BanditNamespaceConfig:
ad_hoc_cve_id: str
vendor: str
product: str
version: str
location: str
description: str
severity: str
score: float


@dataclasses.dataclass
class BanditConfig:
namespaces: dict[str, BanditNamespaceConfig]


class BanditParser(Parser):
"""
Parser for Python requirements files.
This parser is designed to parse Python requirements files (usually named
requirements.txt) and generate PURLs (Package URLs) for the listed packages.
"""

PARSER_MATCH_FILENAMES = [
".py",
]

@staticmethod
def parse_bandit_output(contents):
vendor = "TODO-myvendor"
product = "TODO-myproduct"
version = f"v0.0.0.dev-SomeShaValue-N-Other-Branches-Workload-ID-Scan-Number-{uuid.uuid4()}"

contents = json.loads(contents)

errors = contents.get("errors", [])
if errors:
raise Exception(json.dumps(contents))

namespaces = {}
for i, result in enumerate(contents.get("results", [])):
# Version is the same when code at location matches code from output
result["issue_text"]
result["code"]

# TODO Replace UUID with with SCITT URN
# SCITT A.4.2
ad_hoc_cve_id = f"CVE-0001-urn:ietf:params:scitt:statement:sha-256:base64url:5i6UeRzg1...{i}...qnGmr1o"

# TODO Sort by something, line? Int of content address?
namespace = f"bandit-{i}"

# TODO Take vendor product and version automatically from git repo
# or installed pypi package meta-info.
namespaces[namespace] = BanditNamespaceConfig(
ad_hoc_cve_id=ad_hoc_cve_id,
vendor=vendor,
product=product,
version=version,
severity="LOW",
score=0.0,
location=result["line_number"],
description=json.dumps(result),
)
return BanditConfig(namespaces=namespaces)

def run_checker(self, filename):
"""
Parse the .bandit file and yield ScanInfo objects for the listed packages.
Args:
filename (str): The path to the .bandit file.
Yields:
str: ScanInfo objects for the packages listed in the file.
"""
file_path = pathlib.Path(filename).resolve()
cmd = [
sys.executable,
"-um",
"bandit",
"-f",
"json",
"--exit-zero",
"--",
# TODO Relative paths? Need top level directory being scanned
str(file_path),
]
try:
stdout = subprocess.check_output(
cmd,
)
except subprocess.CalledProcessError as error:
raise Exception(error.stderr) from error

bandit_config = self.parse_bandit_output(stdout)

# TODO Create SCITT_URN_FOR_MANIFEST_OF_EXECUTED_WORKFLOW_WITH_SARIF_OUTPUTS_DEREFERENCEABLE
# by making a request to the poligy engine and getting it's workflow
# manifest as output and deriving from that or extend it to return that.
data_source = "SCITT_URN_FOR_MANIFEST_OF_EXECUTED_WORKFLOW_WITH_SARIF_OUTPUTS_DEREFERENCEABLE"

affected_data = []
severity_data = []

for _namespace, cve in bandit_config.namespaces.items():
affected_data.append(
{
"cve_id": cve.ad_hoc_cve_id,
"vendor": cve.vendor,
"product": cve.product,
# TODO Version MUST be unique to this bug!
"version": cve.version,
"versionStartIncluding": "",
# "versionStartIncluding": cve.version,
"versionStartExcluding": "",
"versionEndIncluding": "",
# "versionEndIncluding": cve.version,
"versionEndExcluding": "",
}
)
severity_data.append(
{
"ID": cve.ad_hoc_cve_id,
# TODO severity
"severity": cve.severity,
# TODO description
"description": cve.description,
# TODO score
"score": 0,
# TODO CVSS_version
"CVSS_version": 3,
# TODO CVSS_vector
"CVSS_vector": "",
# TODO Ideally this comes from bisecting and pinpointing the
# bug's introduction to the codebase
"last_modified": "",
}
)

with self.cve_db.with_cursor() as cursor:
self.cve_db.populate_cve_metrics(severity_data, cursor)
self.cve_db.populate_severity(severity_data, cursor, data_source)
self.cve_db.populate_affected(affected_data, cursor, data_source)

product_info = ProductInfo(
cve.vendor,
cve.product,
cve.version,
cve.location,
PackageURL(
type="ad-hoc",
namespace=cve.vendor,
name=re.sub(r"[^a-zA-Z0-9._-]", "", cve.product).lower(),
version=cve.version,
qualifiers={},
subpath=None,
),
)
for _namespace, cve in bandit_config.namespaces.items():
yield ScanInfo(product_info, pathlib.Path(filename).resolve())

# TODO VEX attached via linked data to ad-hoc CVE-ID
Loading

0 comments on commit 757ee07

Please sign in to comment.