From d1518d32f52f8e4da417590da73844d62669d317 Mon Sep 17 00:00:00 2001 From: Stephan Lipp Date: Fri, 27 Oct 2023 14:42:03 +0200 Subject: [PATCH] refactor(sast): add sanity checks in main SAST routine --- sast-fuzz/static_analysis/sast/config.yml | 6 +- .../static_analysis/sast/src/sfa/__init__.py | 29 ++-- .../sast/src/sfa/analysis/tool_runner.py | 125 +++++++++++------- 3 files changed, 92 insertions(+), 68 deletions(-) diff --git a/sast-fuzz/static_analysis/sast/config.yml b/sast-fuzz/static_analysis/sast/config.yml index 9cde425..e191b13 100644 --- a/sast-fuzz/static_analysis/sast/config.yml +++ b/sast-fuzz/static_analysis/sast/config.yml @@ -5,12 +5,14 @@ scoring: tools: 1.0 tools: flawfinder: + sanity_checks: 'always' # Options: always, cmake, none path: 'python2 /opt/flawfinder-2.0.19/flawfinder.py' checks: - '--falsepositive' - '--minlevel=3' - '--neverignore' semgrep: + sanity_checks: 'always' # Options: always, cmake, none path: '/usr/local/bin/semgrep' checks: - 'r/c.lang.security.double-free.double-free' @@ -27,6 +29,7 @@ tools: - 'r/c.lang.security.use-after-free.use-after-free' num_threads: 8 infer: + sanity_checks: 'always' # Options: always, cmake, none path: '/opt/infer-1.1.0/bin/infer' checks: - '--no-default-checkers' @@ -41,7 +44,7 @@ tools: - '--uninit' num_threads: 8 codeql: - sanity_checks: 'cmake' # Options: always, cmake, none + sanity_checks: 'always' # Options: always, cmake, none lib_path: '/opt/codeql-2.12.0/lib' path: '/opt/codeql-2.12.0/cli/codeql' checks: @@ -264,6 +267,7 @@ tools: - '%LIBRARY_PATH%/cpp/ql/src/Summary/LinesOfUserCode.ql' num_threads: 8 clang_scan: + sanity_checks: 'always' # Options: always, cmake, none path: '/opt/llvm-12.0.0/build/bin/scan-build' checks: - '-disable-checker core.CallAndMessage' diff --git a/sast-fuzz/static_analysis/sast/src/sfa/__init__.py b/sast-fuzz/static_analysis/sast/src/sfa/__init__.py index 5ecfb7f..fae18cc 100644 --- a/sast-fuzz/static_analysis/sast/src/sfa/__init__.py +++ b/sast-fuzz/static_analysis/sast/src/sfa/__init__.py @@ -19,22 +19,11 @@ import yaml - -# fmt: off -class SanityChecks(Enum): - """ - Options of when to run sanity checks. - """ - - ALWAYS = auto(); CMAKE = auto(); NONE = auto() -# fmt: on - - ScoreWeights = namedtuple("ScoreWeights", ["flags", "tools"], defaults=[0.5, 0.5]) # SAST tool configuration SASTToolConfig = namedtuple( - "SASTToolConfig", ["sanity_checks", "path", "checks", "num_threads"], defaults=[SanityChecks.NONE, "", "", -1] + "SASTToolConfig", ["sanity_checks", "path", "checks", "num_threads"], defaults=["", "", "", -1] ) @@ -70,27 +59,33 @@ def from_yaml(cls, file: Path) -> "AppConfig": return cls( ScoreWeights(config["scoring"]["weights"]["flags"], config["scoring"]["weights"]["tools"]), flawfinder=SASTToolConfig( - SanityChecks.NONE, config["tools"]["flawfinder"]["path"], config["tools"]["flawfinder"]["checks"], -1 + config["tools"]["flawfinder"]["sanity_checks"], + config["tools"]["flawfinder"]["path"], + config["tools"]["flawfinder"]["checks"], + -1, ), semgrep=SASTToolConfig( - SanityChecks.NONE, + config["tools"]["semgrep"]["sanity_checks"], config["tools"]["semgrep"]["path"], config["tools"]["semgrep"]["checks"], config["tools"]["semgrep"]["num_threads"], ), infer=SASTToolConfig( - SanityChecks.NONE, + config["tools"]["infer"]["sanity_checks"], config["tools"]["infer"]["path"], config["tools"]["infer"]["checks"], config["tools"]["infer"]["num_threads"], ), codeql=SASTToolConfig( - SanityChecks[config["tools"]["codeql"]["sanity_checks"].upper()], + config["tools"]["codeql"]["sanity_checks"], config["tools"]["codeql"]["path"], codeql_checks, config["tools"]["codeql"]["num_threads"], ), clang_scan=SASTToolConfig( - SanityChecks.NONE, config["tools"]["clang_scan"]["path"], config["tools"]["clang_scan"]["checks"], -1 + config["tools"]["clang_scan"]["sanity_checks"], + config["tools"]["clang_scan"]["path"], + config["tools"]["clang_scan"]["checks"], + -1, ), ) diff --git a/sast-fuzz/static_analysis/sast/src/sfa/analysis/tool_runner.py b/sast-fuzz/static_analysis/sast/src/sfa/analysis/tool_runner.py index a052ba6..bd0d471 100644 --- a/sast-fuzz/static_analysis/sast/src/sfa/analysis/tool_runner.py +++ b/sast-fuzz/static_analysis/sast/src/sfa/analysis/tool_runner.py @@ -23,7 +23,7 @@ from tempfile import TemporaryDirectory from typing import Callable, ClassVar, Dict, Optional -from sfa import SanityChecks, SASTToolConfig +from sfa import SASTToolConfig from sfa.analysis import SASTFlag, SASTFlags from sfa.utils.fs import copy_dir, find_files from sfa.utils.proc import run_shell_command @@ -57,12 +57,11 @@ def is_cmake_project(subject_dir: Path) -> bool: return (subject_dir / "CMakeLists.txt").exists() -def convert_sarif(string: str, sanity_check: Optional[Callable[[Dict], None]] = None) -> SASTFlags: +def default_sarif_checks(string: str) -> Dict: """ - Convert SARIF data into our SAST flag format. + Run default checks on SARIF string. :param string: - :param sanity_check: :return: """ if len(string.strip()) == 0: @@ -73,8 +72,17 @@ def convert_sarif(string: str, sanity_check: Optional[Callable[[Dict], None]] = if sarif_data["version"] != SARIF_VERSION: raise ValueError(f"SARIF version {sarif_data['version']} is not supported.") - if sanity_check is not None: - sanity_check(sarif_data) + return sarif_data + + +def convert_sarif(string: str) -> SASTFlags: + """ + Convert SARIF data into our SAST flag format. + + :param string: + :return: + """ + sarif_data = json.loads(string) flags = SASTFlags() @@ -129,6 +137,16 @@ def _analyze(self, working_dir: Path) -> str: """ pass + @abstractmethod + def _sanity_checks(self, string: str) -> None: + """ + Run sanity checks on SAST tool output. + + :param string: + :return: + """ + pass + @abstractmethod def _format(self, string: str) -> SASTFlags: """ @@ -141,7 +159,7 @@ def _format(self, string: str) -> SASTFlags: def run(self) -> SASTFlags: """ - Setup target program, run SAST tool, and format output. + Setup target program, run SAST tool (+ sanity checks), and format output. :return: """ @@ -150,6 +168,11 @@ def run(self) -> SASTFlags: working_dir = self._setup(Path(temp_dir)) flags = self._analyze(working_dir) + if self._config.sanity_checks == "always" or ( + self._config.sanity_checks == "cmake" and self._is_cmake_project + ): + self._sanity_checks(flags) + return self._format(flags) except Exception as ex: @@ -172,6 +195,9 @@ def _analyze(self, working_dir: Path) -> str: f"{self._config.path} --dataonly --sarif {' '.join(self._config.checks)} {working_dir}" ) + def _sanity_checks(self, string: str) -> None: + default_sarif_checks(string) + def _format(self, string: str) -> SASTFlags: return convert_sarif(string) @@ -189,6 +215,9 @@ def _analyze(self, working_dir: Path) -> str: f"{self._config.path} scan --quiet --sarif --jobs {self._config.num_threads} {' '.join([f'--config {check}' for check in self._config.checks])} {working_dir}" ) + def _sanity_checks(self, string: str) -> None: + default_sarif_checks(string) + def _format(self, string: str) -> SASTFlags: return convert_sarif(string) @@ -220,6 +249,9 @@ def _analyze(self, working_dir: Path) -> str: # By default, Infer writes the results into the 'report.json' file once the analysis is complete. return (working_dir / "report.json").read_text() + def _sanity_checks(self, string: str) -> None: + pass + def _format(self, string: str) -> SASTFlags: flags = SASTFlags() @@ -241,39 +273,6 @@ class CodeQLRunner(SASTToolRunner): CodeQL runner. """ - def _sanity_check(self, sarif_data: Dict) -> None: - """ - Run sanity checks on CodeQL output. - - :param sarif_data: - :return: - """ - n_runs = len(sarif_data["runs"]) - - if n_runs == 0: - raise ValueError("No CodeQL execution runs found.") - - # Let's take the last executed SAST run for the sanity check - run = sarif_data["runs"][n_runs - 1] - metrics = run["properties"].get("metricResults") - - if metrics is None: - raise ValueError("No CodeQL metrics data found in SARIF file.") - - loc = 0 - user_loc = 0 - - for m in metrics: - if m["ruleId"] == "cpp/summary/lines-of-code": - loc = int(m["value"]) - if m["ruleId"] == "cpp/summary/lines-of-user-code": - user_loc = int(m["value"]) - - logging.debug(f"Sanity-Check [CodeQL]: LoC = {loc}, User-LoC = {user_loc}") - - if user_loc == 0: - raise ValueError("No user C/C++ source code found in the CodeQL database.") - def _setup(self, temp_dir: Path) -> Path: result_dir = temp_dir / "codeql_res" @@ -303,18 +302,37 @@ def _analyze(self, working_dir: Path) -> str: return result_file.read_text() - def _format(self, string: str) -> SASTFlags: - run_sc = False + def _sanity_checks(self, string: str) -> None: + sarif_data = default_sarif_checks(string) - if self._config.sanity_checks == SanityChecks.ALWAYS or ( - self._config.sanity_checks == SanityChecks.CMAKE and self._is_cmake_project - ): - run_sc = True + n_runs = len(sarif_data["runs"]) - if not run_sc: - return convert_sarif(string) - else: - return convert_sarif(string, self._sanity_check) + if n_runs == 0: + raise ValueError("No CodeQL execution runs found.") + + # Let's take the last executed SAST run for the sanity check + run = sarif_data["runs"][n_runs - 1] + metrics = run["properties"].get("metricResults") + + if metrics is None: + raise ValueError("No CodeQL metrics data found in SARIF file.") + + loc = 0 + user_loc = 0 + + for m in metrics: + if m["ruleId"] == "cpp/summary/lines-of-code": + loc = int(m["value"]) + if m["ruleId"] == "cpp/summary/lines-of-user-code": + user_loc = int(m["value"]) + + logging.debug(f"Sanity-Check [CodeQL]: LoC = {loc}, User-LoC = {user_loc}") + + if user_loc == 0: + raise ValueError("No user C/C++ source code found in the CodeQL database.") + + def _format(self, string: str) -> SASTFlags: + return convert_sarif(string) class ClangScanRunner(SASTToolRunner): @@ -340,6 +358,10 @@ def _analyze(self, working_dir: Path) -> str: # (JSON string) of each file as one line to the return string. return os.linesep.join(map(lambda file: json.dumps(json.loads(file.read_text()), indent=None), result_files)) + def _sanity_checks(self, string: str) -> None: + for sarif_str in string.split(os.linesep): + default_sarif_checks(sarif_str) + def _format(self, string: str) -> SASTFlags: nested_flags = map(convert_sarif, string.split(os.linesep)) @@ -369,6 +391,9 @@ def _setup(self, temp_dir: Path) -> Path: def _analyze(self, working_dir: Path) -> str: return (working_dir / self._report_name).read_text() + def _sanity_checks(self, string: str) -> None: + pass + def _format(self, string: str) -> SASTFlags: flags = SASTFlags()