Skip to content

Commit

Permalink
feat: documentation on out of tree parsers
Browse files Browse the repository at this point in the history
Signed-off-by: John <[email protected]>

feat: bandit adhoc cves

nodemon -e py --exec "clear; cve-bin-tool -u never --log debug --format json --detailed -- cve_bin_tool/parsers/static_analysis_bandit.py; sh -c 'cat $(ls output.* | tail -n 1) | jq'; test 1"

Signed-off-by: John Andersen <[email protected]>
  • Loading branch information
John Andersen authored and pdxjohnny committed Jun 18, 2024
1 parent fadb9ca commit 1197881
Show file tree
Hide file tree
Showing 8 changed files with 577 additions and 1 deletion.
9 changes: 9 additions & 0 deletions cve_bin_tool/cvedb.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import asyncio
import datetime
import contextlib
import json
import logging
import shutil
Expand Down Expand Up @@ -1193,3 +1194,11 @@ def fetch_from_mirror(self, mirror, pubkey, ignore_signature, log_signature_erro
else:
self.clear_cached_data()
return -1

@contextlib.contextmanager
def with_cursor(self):
cursor = self.db_open_and_get_cursor()
try:
yield cursor
finally:
self.db_close()
1 change: 1 addition & 0 deletions cve_bin_tool/parsers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
"php",
"perl",
"dart",
"env",
]


Expand Down
148 changes: 148 additions & 0 deletions cve_bin_tool/parsers/env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: GPL-3.0-or-later

import json
import re
import pathlib
import subprocess
import contextlib
import dataclasses
from re import MULTILINE, compile, search

from packageurl import PackageURL
from packaging.version import parse as parse_version

from cve_bin_tool.parsers import Parser
from cve_bin_tool.strings import parse_strings
from cve_bin_tool.util import ProductInfo, ScanInfo


@dataclasses.dataclass
class EnvNamespaceConfig:
ad_hoc_cve_id: str
vendor: str
product: str
version: str
location: str = "/usr/local/bin/product"


@dataclasses.dataclass
class EnvConfig:
namespaces: dict[str, EnvNamespaceConfig]


class EnvParser(Parser):
"""
Parser for Python requirements files.
This parser is designed to parse Python requirements files (usually named
requirements.txt) and generate PURLs (Package URLs) for the listed packages.
"""

PARSER_MATCH_FILENAMES = [
".env",
]

@staticmethod
def parse_file_contents(contents):
lines = list(
[
line
for line in contents.replace("\r\n", "\n").split("\n")
if line.strip() and line.startswith("CVE_BIN_TOOL_")
]
)
namespaces = {}
for i, line in enumerate(lines):
key, value = line.split("=", maxsplit=1)
namespace, key = key[len("CVE_BIN_TOOL_") :].split("_", maxsplit=1)
if value.startswith('"'):
value = value[1:]
if value.endswith('"'):
value = value[:-1]
namespaces.setdefault(namespace, {})
namespaces[namespace][key.lower()] = value
for namespace, config in namespaces.items():
namespaces[namespace] = EnvNamespaceConfig(**config)
return EnvConfig(namespaces=namespaces)

def run_checker(self, filename):
"""
Parse the .env file and yield ScanInfo objects for the listed packages.
Args:
filename (str): The path to the .env file.
Yields:
str: ScanInfo objects for the packages listed in the file.
"""
self.filename = filename
contents = pathlib.Path(self.filename).read_text()

env_config = self.parse_file_contents(contents)

# TODO Create SCITT_URN_FOR_MANIFEST_OF_EXECUTED_WORKFLOW_WITH_SARIF_OUTPUTS_DEREFERENCEABLE
# by making a request to the poligy engine and getting it's workflow
# manifest as output and deriving from that or extend it to return that.
data_source = "SCITT_URN_FOR_MANIFEST_OF_EXECUTED_WORKFLOW_WITH_SARIF_OUTPUTS_DEREFERENCEABLE"
affected_data = [
{
"cve_id": cve.ad_hoc_cve_id,
"vendor": cve.vendor,
"product": cve.product,
# TODO Version MUST be unique to this bug!
"version": cve.version,
"versionStartIncluding": "",
# "versionStartIncluding": cve.version,
"versionStartExcluding": "",
"versionEndIncluding": "",
# "versionEndIncluding": cve.version,
"versionEndExcluding": "",
}
for _namespace, cve in env_config.namespaces.items()
]
severity_data = [
{
"ID": cve.ad_hoc_cve_id,
# TODO severity
"severity": "LOW",
# TODO description
"description": "TODO",
# TODO score
"score": 0,
# TODO CVSS_version
"CVSS_version": 3,
# TODO CVSS_vector
"CVSS_vector": "",
# TODO Ideally this comes from bisecting and pinpointing the
# bug's introduction to the codebase
"last_modified": "",
}
for _namespace, cve in env_config.namespaces.items()
]

with self.cve_db.with_cursor() as cursor:
for _namespace, cve in env_config.namespaces.items():
cursor.execute(f"DELETE from cve_range where product='{cve.product}'")
with self.cve_db.with_cursor() as cursor:
self.cve_db.populate_cve_metrics(severity_data, cursor)
self.cve_db.populate_severity(severity_data, cursor, data_source)
self.cve_db.populate_affected(affected_data, cursor, data_source)

for _namespace, cve in env_config.namespaces.items():
yield ScanInfo(
ProductInfo(
cve.vendor,
cve.product,
cve.version,
cve.location,
PackageURL(
type="ad-hoc",
namespace=cve.vendor,
name=re.sub(r"[^a-zA-Z0-9._-]", "", cve.product).lower(),
version=cve.version,
qualifiers={},
subpath=None,
),
),
pathlib.Path(filename).resolve(),
)

# TODO VEX attached via linked data to ad-hoc CVE-ID
183 changes: 183 additions & 0 deletions cve_bin_tool/parsers/static_analysis_bandit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: GPL-3.0-or-later

import sys
import json
import uuid
import re
import pathlib
import subprocess
import contextlib
import dataclasses
from re import MULTILINE, compile, search

from packageurl import PackageURL
from packaging.version import parse as parse_version

from cve_bin_tool.parsers import Parser
from cve_bin_tool.strings import parse_strings
from cve_bin_tool.util import ProductInfo, ScanInfo


@dataclasses.dataclass
class BanditNamespaceConfig:
ad_hoc_cve_id: str
vendor: str
product: str
version: str
location: str
description: str
severity: str
score: float


@dataclasses.dataclass
class BanditConfig:
namespaces: dict[str, BanditNamespaceConfig]


class BanditParser(Parser):
"""
Parser for Python requirements files.
This parser is designed to parse Python requirements files (usually named
requirements.txt) and generate PURLs (Package URLs) for the listed packages.
"""

PARSER_MATCH_FILENAMES = [
".py",
]

@staticmethod
def parse_bandit_output(contents):
vendor = "TODO-myvendor"
product = "TODO-myproduct"
version = f"v0.0.0.dev-SomeShaValue-N-Other-Branches-Workload-ID-Scan-Number-{uuid.uuid4()}"

contents = json.loads(contents)

errors = contents.get("errors", [])
if errors:
raise Exception(json.dumps(contents))

namespaces = {}
for i, result in enumerate(contents.get("results", [])):
# Version is the same when code at location matches code from output
result["issue_text"]
result["code"]

# TODO Replace UUID with with SCITT URN
# SCITT A.4.2
ad_hoc_cve_id = f"CVE-0001-urn:ietf:params:scitt:statement:sha-256:base64url:5i6UeRzg1...{i}...qnGmr1o"

# TODO Sort by something, line? Int of content address?
namespace = f"bandit-{i}"

# TODO Take vendor product and version automatically from git repo
# or installed pypi package meta-info.
namespaces[namespace] = BanditNamespaceConfig(
ad_hoc_cve_id=ad_hoc_cve_id,
vendor=vendor,
product=product,
version=version,
severity="LOW",
score=0.0,
location=result["line_number"],
description=json.dumps(result),
)
return BanditConfig(namespaces=namespaces)

def run_checker(self, filename):
"""
Parse the .bandit file and yield ScanInfo objects for the listed packages.
Args:
filename (str): The path to the .bandit file.
Yields:
str: ScanInfo objects for the packages listed in the file.
"""
file_path = pathlib.Path(filename).resolve()
cmd = [
sys.executable,
"-um",
"bandit",
"-f",
"json",
"--exit-zero",
"--",
# TODO Relative paths? Need top level directory being scanned
str(file_path),
]
try:
stdout = subprocess.check_output(
cmd,
)
except subprocess.CalledProcessError as error:
raise Exception(error.stderr) from error

bandit_config = self.parse_bandit_output(stdout)

# TODO Create SCITT_URN_FOR_MANIFEST_OF_EXECUTED_WORKFLOW_WITH_SARIF_OUTPUTS_DEREFERENCEABLE
# by making a request to the poligy engine and getting it's workflow
# manifest as output and deriving from that or extend it to return that.
data_source = "SCITT_URN_FOR_MANIFEST_OF_EXECUTED_WORKFLOW_WITH_SARIF_OUTPUTS_DEREFERENCEABLE"

affected_data = []
severity_data = []

for _namespace, cve in bandit_config.namespaces.items():
affected_data.append(
{
"cve_id": cve.ad_hoc_cve_id,
"vendor": cve.vendor,
"product": cve.product,
# TODO Version MUST be unique to this bug!
"version": cve.version,
"versionStartIncluding": "",
# "versionStartIncluding": cve.version,
"versionStartExcluding": "",
"versionEndIncluding": "",
# "versionEndIncluding": cve.version,
"versionEndExcluding": "",
}
)
severity_data.append(
{
"ID": cve.ad_hoc_cve_id,
# TODO severity
"severity": cve.severity,
# TODO description
"description": cve.description,
# TODO score
"score": 0,
# TODO CVSS_version
"CVSS_version": 3,
# TODO CVSS_vector
"CVSS_vector": "",
# TODO Ideally this comes from bisecting and pinpointing the
# bug's introduction to the codebase
"last_modified": "",
}
)

with self.cve_db.with_cursor() as cursor:
self.cve_db.populate_cve_metrics(severity_data, cursor)
self.cve_db.populate_severity(severity_data, cursor, data_source)
self.cve_db.populate_affected(affected_data, cursor, data_source)

product_info = ProductInfo(
cve.vendor,
cve.product,
cve.version,
cve.location,
PackageURL(
type="ad-hoc",
namespace=cve.vendor,
name=re.sub(r"[^a-zA-Z0-9._-]", "", cve.product).lower(),
version=cve.version,
qualifiers={},
subpath=None,
),
)
for _namespace, cve in bandit_config.namespaces.items():
yield ScanInfo(product_info, pathlib.Path(filename).resolve())

# TODO VEX attached via linked data to ad-hoc CVE-ID
Loading

0 comments on commit 1197881

Please sign in to comment.